# This file implements a parser for html snippets, meant to be used by a
# WYSIWYG editor. Hence it does not attempt to parse doctypes, <html>, <head>
# or <body> tags, nor does it produce the top level "document" node in the dom
-# tree, nor nodes for html, head or body.
+# tree, nor nodes for html, head or body. Comments containing "fixfull"
+# indicate places where additional code is needed for full HTML document
+# parsing.
#
# Instead, the data structure produced by this parser is an array of nodes.
#
ret += JSON.stringify @name
ret += ','
ret += JSON.stringify @attrs
- ret += ','
- sep = '['
+ ret += ',['
+ sep = ''
for c in @children
ret += sep
sep = ','
tok_cur_tag = null # partially parsed tag
flag_frameset_ok = null
flag_parsing = null
+ flag_foster_parenting = null
afe = [] # active formatting elements
parse_error = ->
# Create
loop
el = afe[i].shallow_clone()
- tree_insert_tag el
+ tree_insert_element el
afe[i] = el
break if i is 0
i -= 1
# 14. Insert whatever last node ended up being in the previous step
# at the appropriate place for inserting a node, but using common
# ancestor as the override target.
- tree_insert_tag last_node, ca
+ tree_insert_element last_node, ca
# 15. Create an element for the token for which formatting element
# was created, in the HTML namespace, with furthest block as the
# intended parent.
# TODO pop stack until 'p' popped
# http://www.w3.org/TR/html5/syntax.html#insert-a-character
- tree_insert_a_character = (t) ->
- # FIXME read spec for "adjusted insertion location, etc, this might be wrong
- dest = open_els[0].children
- if dest.length > 0 and dest[dest.length - 1].type is TYPE_TEXT
- dest[dest.length - 1].text += t.text
+ tree_insert_text = (t) ->
+ dest = adjusted_insertion_location()
+ if dest[1] > 0
+ prev = dest[0].children[dest[1] - 1]
+ if prev.type is TYPE_TEXT
+ prev.text += t.text
+ return
+ dest[0].children.splice dest[1], 0, t
+
+ # 8.2.5.1
+ # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
+ # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
+ adjusted_insertion_location = (override_target = null) ->
+ # 1. If there was an override target specified, then let target be the
+ # override target.
+ if override_target?
+ target = override_target
+ else # Otherwise, let target be the current node.
+ target = open_els[0]
+ # 2. Determine the adjusted insertion location using the first matching
+ # steps from the following list:
+ #
+ # If foster parenting is enabled and target is a table, tbody, tfoot,
+ # thead, or tr element Foster parenting happens when content is
+ # misnested in tables.
+ if flag_foster_parenting and target.name in foster_parenting_targets
+ console.log "foster parenting isn't implemented yet" # TODO
+ # 1. Let last template be the last template element in the stack of
+ # open elements, if any.
+ # 2. Let last table be the last table element in the stack of open
+ # elements, if any.
+
+ # 3. If there is a last template and either there is no last table,
+ # or there is one, but last template is lower (more recently added)
+ # than last table in the stack of open elements, then: let adjusted
+ # insertion location be inside last template's template contents,
+ # after its last child (if any), and abort these substeps.
+
+ # 4. If there is no last table, then let adjusted insertion
+ # location be inside the first element in the stack of open
+ # elements (the html element), after its last child (if any), and
+ # abort these substeps. (fragment case)
+
+ # 5. If last table has a parent element, then let adjusted
+ # insertion location be inside last table's parent element,
+ # immediately before last table, and abort these substeps.
+
+ # 6. Let previous element be the element immediately above last
+ # table in the stack of open elements.
+
+ # 7. Let adjusted insertion location be inside previous element,
+ # after its last child (if any).
+
+ # Note: These steps are involved in part because it's possible for
+ # elements, the table element in this case in particular, to have
+ # been moved by a script around in the DOM, or indeed removed from
+ # the DOM entirely, after the element was inserted by the parser.
else
- dest.push t
+ # Otherwise Let adjusted insertion location be inside target, after
+ # its last child (if any).
+ target_i = target.children.length
- # FIXME read spec, do this right
- # FIXME implement the override target thing
- # note: this assumes it's an open tag
- tree_insert_tag = (t, override_target = null) ->
+ # 3. If the adjusted insertion location is inside a template element,
+ # let it instead be inside the template element's template contents,
+ # after its last child (if any). TODO
+
+ # 4. Return the adjusted insertion location.
+ return [target, target_i]
+
+ # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
+ # aka create_an_element_for_token
+ token_to_element = (t, namespace, intended_parent) ->
t.type = TYPE_TAG # not TYPE_OPEN_TAG
# convert attributes into a hash
+ attrs = {}
while t.attrs_a.length
a = t.attrs_a.pop()
- t.attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
- if t.parent?
- for c, i of t.parent.children
- if c is t
- t.parent.children.splice i, 1
- # FIXME spec says to do something to figure out what parent should be
- parent = open_els[0]
- open_els.unshift t
- parent.children.push t
- t.parent = parent
+ attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
+ el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs
+
+ # TODO 2. If the newly created element has an xmlns attribute in the
+ # XMLNS namespace whose value is not exactly the same as the element's
+ # namespace, that is a parse error. Similarly, if the newly created
+ # element has an xmlns:xlink attribute in the XMLNS namespace whose
+ # value is not the XLink Namespace, that is a parse error.
+
+ # fixfull: the spec says stuff about form pointers and ownerDocument
+
+ return el
+
+ # FIXME read implement "foster parenting" part
+ # FIXME read spec, do this right
+ # FIXME implement the override target thing
+ # note: this assumes it's an open tag
+ # TODO tree_insert_html_element = (t, ...
+ tree_insert_element = (el, override_target = null, namespace = null) ->
+ dest = adjusted_insertion_location override_target
+ if el.type is TYPE_OPEN_TAG # means it's a "token"
+ el = token_to_element el, namespace, dest[0]
+ # fixfull: Document nodes sometimes can't accept more chidren
+ dest[0].children.splice dest[1], 0, el
+ el.parent = dest[0]
+ open_els.unshift el
+ return el
# http://www.w3.org/TR/html5/syntax.html#insert-a-comment
tree_insert_a_comment = (t) ->
parse_error()
when "\t", "\u000a", "\u000c", "\u000d", ' '
reconstruct_active_formatting_elements()
- tree_insert_a_character t
+ tree_insert_text t
else
reconstruct_active_formatting_elements()
- tree_insert_a_character t
+ tree_insert_text t
flag_frameset_ok = false
when TYPE_COMMENT
tree_insert_a_comment t
# TODO
when 'address', 'article', 'aside', 'blockquote', 'center', 'details', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'header', 'hgroup', 'main', 'nav', 'ol', 'p', 'section', 'summary', 'ul'
close_p_if_in_button_scope()
- tree_insert_tag t
+ tree_insert_element t
when 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'
close_p_if_in_button_scope()
if open_els[0].name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
parse_error()
open_els.shift()
- tree_insert_tag t
+ tree_insert_element t
# TODO lots more to implement here
when 'b', 'big', 'code', 'em', 'font', 'i', 's', 'small', 'strike', 'strong', 'tt', 'u'
reconstruct_active_formatting_elements()
- tree_insert_tag t
- afe.push t
+ el = tree_insert_element t
+ afe.push el
# TODO lots more to implement here
else # any other start tag
reconstruct_active_formatting_elements()
- tree_insert_tag t
+ tree_insert_element t
when TYPE_EOF
ok_tags = {
dd: true, dt: true, li: true, p: true, tbody: true, td: true,
tree_state = tree_in_body
flag_frameset_ok = true
flag_parsing = true
+ flag_foster_parenting = false
afe = [] # active formatting elements
# tokenizer initialization
html: "<b><i><a><s><tt><div></b>first</b></div></tt></s></a>second</i>",
# chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]'
# firefox does this:
- expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"'
+ expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"',
errors: 6 # no idea how many there should be