+ tok_cur_tag = null # partially parsed tag
+ flag_frameset_ok = null
+ flag_parsing = null
+ flag_foster_parenting = null
+ afe = [] # active formatting elements
+
+ parse_error = ->
+ if parse_error_cb?
+ parse_error_cb cur
+ else
+ console.log "Parse error at character #{cur} of #{txt.length}"
+
+
+ # the functions below impliment the Tree Contstruction algorithm
+ # http://www.w3.org/TR/html5/syntax.html#tree-construction
+
+ # But first... the helpers
+ template_tag_is_open = ->
+ for t in open_els
+ if t.type is TYPE_TAG and t.name is 'template'
+ return true
+ return false
+ is_in_scope_x = (tag_name, scope) ->
+ for t in open_els
+ if t.name is tag_name
+ return true
+ if t.name of scope
+ return false
+ return false
+ is_in_scope_x_y = (tag_name, scope, scope2) ->
+ for t in open_els
+ if t.name is tag_name
+ return true
+ if t.name of scope
+ return false
+ if t.name of scope2
+ return false
+ return false
+ standard_scopers = { # FIXME these are supposed to be namespace specific
+ 'applet': true, 'caption': true, 'html': true, 'table': true, 'td': true,
+ 'th': true, 'marquee': true, 'object': true, 'template': true, 'mi': true,
+ 'mo': true, 'mn': true, 'ms': true, 'mtext': true, 'annotation-xml': true,
+ 'foreignObject': true, 'desc': true, 'title'
+ }
+ button_scopers = button: true
+ li_scopers = ol: true, ul: true
+ table_scopers = html: true, table: true, template: true
+ is_in_scope = (tag_name) ->
+ return is_in_scope_x tag_name, standard_scopers
+ is_in_button_scope = (tag_name) ->
+ return is_in_scope_x_y tag_name, standard_scopers, button_scopers
+ is_in_table_scope = (tag_name) ->
+ return is_in_scope_x tag_name, table_scopers
+ is_in_select_scope = (tag_name) ->
+ for t in open_els
+ if t.name is tag_name
+ return true
+ if t.name isnt 'optgroup' and t.name isnt 'option'
+ return false
+ return false
+ # this checks for a particular element, not by name
+ el_is_in_scope = (el) ->
+ for t in open_els
+ if t is el
+ return true
+ if t.name of standard_scopers
+ return false
+ return false
+
+ # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
+ # this implementation is structured (mostly) as described at the link above.
+ # capitalized comments are the "labels" described at the link above.
+ reconstruct_active_formatting_elements = ->
+ return if afe.length is 0
+ if afe[0].type is TYPE_MARKER or afe[0] in open_els
+ return
+ # Rewind
+ i = 0
+ loop
+ if i is afe.length - 1
+ break
+ i += 1
+ if afe[i].type is TYPE_MARKER or afe[i] in open_els
+ i -= 1 # Advance
+ break
+ # Create
+ loop
+ el = afe[i].shallow_clone()
+ tree_insert_element el
+ afe[i] = el
+ break if i is 0
+ i -= 1
+
+ # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
+ # adoption agency algorithm
+ adoption_agency = (subject) ->
+ if open_els[0].name is subject
+ el = open_els[0]
+ open_els.shift()
+ # remove it from the list of active formatting elements (if found)
+ for t, i in afe
+ if t is el
+ afe.splice i, 1
+ break
+ return
+ outer = 0
+ loop
+ if outer >= 8
+ return
+ outer += 1
+ fe = null
+ for t, fe_index in afe
+ if t.type is TYPE_MARKER
+ break
+ if t.name is subject
+ fe = t
+ break
+ if fe is null
+ in_body_any_other_end_tag subject
+ return
+ in_open_els = false
+ for t in open_els
+ if t is fe
+ in_open_els = true
+ break
+ unless in_open_els
+ parse_error()
+ # "remove it from the list" must mean afe, since it's not in open_els
+ afe.splice fe_index, 1
+ return
+ unless el_is_in_scope fe
+ parse_error()
+ return
+ unless open_els[0] is fe
+ parse_error()
+ # continue
+ fb = null
+ fb_index
+ for t, i in open_els
+ if t is fe
+ break
+ if el_is_special t
+ fb = t
+ fb_index = i
+ if fb is null
+ loop
+ t = open_els.shift()
+ if t is fe
+ afe.splice fe_index, 1
+ return
+ ca = open_els[fe_index + 1] # common ancestor
+ node_above = open_els[fb_index + 1] # next node if node isn't in open_els anymore
+ # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
+ bookmark = new_aaa_bookmark()
+ for t, i in afe
+ if t is fe
+ afe.splice i, 0, bookmark
+ node = last_node = fb
+ inner = 0
+ loop
+ inner += 1
+ node_next = null
+ for t, i in open_els
+ if t is node
+ node_next = open_els[i + 1]
+ break
+ node = node_next ? node_above
+ # TODO make sure node_above gets re-set if/when node is removed from open_els
+ if node is fe
+ break
+ node_in_afe = false
+ for t, i of afe
+ if t is node
+ if inner > 3
+ afe.splice i, 1
+ else
+ node_in_afe = true
+ break
+ unless node_in_afe
+ for t, i in open_els
+ if t is node
+ node_above = open_els[i + 1]
+ open_els.splice i, 1
+ break
+ continue
+ # 7. reate an element for the token for which the element node
+ # was created, in the HTML namespace, with common ancestor as
+ # the intended parent; replace the entry for node in the list
+ # of active formatting elements with an entry for the new
+ # element, replace the entry for node in the stack of open
+ # elements with an entry for the new element, and let node be
+ # the new element.
+ new_node = node.shallow_clone()
+ for t, i in afe
+ if t is node
+ afe[i] = new_node
+ break
+ for t, i in open_els
+ if t is node
+ open_els[i] = new_node
+ break
+ node = new_node
+ # 8. If last node is furthest block, then move the
+ # aforementioned bookmark to be immediately after the new node
+ # in the list of active formatting elements.
+ if last_node is fb
+ for t, i in afe
+ if t is bookmark
+ afe.splice i, 1
+ for t, i in afe
+ if t is node
+ # TODO test: position i gets you "after"?
+ afe.splice i, 0, new_aaa_bookmark()
+ # 9. Insert last node into node, first removing it from its
+ # previous parent node if any.
+ if last_node.parent?
+ for c, i of last_node.parent.children
+ if c is last_node
+ last_node.parent.children.splice i, 1
+ node.children.push last_node
+ last_node.parent = node
+ # 10. Let last node be node.
+ last_node = node
+ # 11. Return to the step labeled inner loop.
+ # 14. Insert whatever last node ended up being in the previous step
+ # at the appropriate place for inserting a node, but using common
+ # ancestor as the override target.
+ tree_insert_element last_node, ca
+ # 15. Create an element for the token for which formatting element
+ # was created, in the HTML namespace, with furthest block as the
+ # intended parent.
+ new_element = fe.shallow_clone()
+ # 16. Take all of the child nodes of furthest block and append them
+ # to the element created in the last step.
+ while fb.children.length
+ t = fb.children.shift()
+ t.parent = new_element
+ new_element.children.push t
+ # 17. Append that new element to furthest block.
+ new_element.parent = fb
+ fb.children.push new_element
+ # 18. Remove formatting element from the list of active formatting
+ # elements, and insert the new element into the list of active
+ # formatting elements at the position of the aforementioned
+ # bookmark.
+ for t, i in afe
+ if t is fe
+ afe.splice i, 1
+ break
+ for t, i in afe
+ if t is bookmark
+ afe[i] = node
+ break
+ # 19. Remove formatting element from the stack of open elements,
+ # and insert the new element into the stack of open elements
+ # immediately below the position of furthest block in that stack.
+ for t, i of open_els
+ if t is fe
+ open_els.splice i, 1
+ break
+ for t, i of open_els
+ if t is fb
+ open_els.splice i, 0, new_element
+ break
+ # 20. Jump back to the step labeled outer loop.
+
+ # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
+ # FIXME implement this
+ close_p_if_in_button_scope = ->
+ if open_els[0].name is 'p'
+ open_els.pop()
+ return
+ #p = find_button_scope 'p'
+ #if p?
+ # TODO generate_implied_end_tags except for p tags
+ # TODO parse_error unless open_els[0].name is 'p'
+ # TODO pop stack until 'p' popped
+
+ # http://www.w3.org/TR/html5/syntax.html#insert-a-character
+ tree_insert_text = (t) ->
+ dest = adjusted_insertion_location()
+ if dest[1] > 0
+ prev = dest[0].children[dest[1] - 1]
+ if prev.type is TYPE_TEXT
+ prev.text += t.text
+ return
+ dest[0].children.splice dest[1], 0, t
+
+ # 8.2.5.1
+ # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
+ # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
+ adjusted_insertion_location = (override_target = null) ->
+ # 1. If there was an override target specified, then let target be the
+ # override target.
+ if override_target?
+ target = override_target
+ else # Otherwise, let target be the current node.
+ target = open_els[0]
+ # 2. Determine the adjusted insertion location using the first matching
+ # steps from the following list:
+ #
+ # If foster parenting is enabled and target is a table, tbody, tfoot,
+ # thead, or tr element Foster parenting happens when content is
+ # misnested in tables.
+ if flag_foster_parenting and target.name in foster_parenting_targets
+ console.log "foster parenting isn't implemented yet" # TODO
+ # 1. Let last template be the last template element in the stack of
+ # open elements, if any.
+ # 2. Let last table be the last table element in the stack of open
+ # elements, if any.
+
+ # 3. If there is a last template and either there is no last table,
+ # or there is one, but last template is lower (more recently added)
+ # than last table in the stack of open elements, then: let adjusted
+ # insertion location be inside last template's template contents,
+ # after its last child (if any), and abort these substeps.
+
+ # 4. If there is no last table, then let adjusted insertion
+ # location be inside the first element in the stack of open
+ # elements (the html element), after its last child (if any), and
+ # abort these substeps. (fragment case)
+
+ # 5. If last table has a parent element, then let adjusted
+ # insertion location be inside last table's parent element,
+ # immediately before last table, and abort these substeps.
+
+ # 6. Let previous element be the element immediately above last
+ # table in the stack of open elements.
+
+ # 7. Let adjusted insertion location be inside previous element,
+ # after its last child (if any).
+
+ # Note: These steps are involved in part because it's possible for
+ # elements, the table element in this case in particular, to have
+ # been moved by a script around in the DOM, or indeed removed from
+ # the DOM entirely, after the element was inserted by the parser.
+ else
+ # Otherwise Let adjusted insertion location be inside target, after
+ # its last child (if any).
+ target_i = target.children.length
+
+ # 3. If the adjusted insertion location is inside a template element,
+ # let it instead be inside the template element's template contents,
+ # after its last child (if any). TODO
+
+ # 4. Return the adjusted insertion location.
+ return [target, target_i]
+
+ # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
+ # aka create_an_element_for_token
+ token_to_element = (t, namespace, intended_parent) ->
+ t.type = TYPE_TAG # not TYPE_OPEN_TAG
+ # convert attributes into a hash
+ attrs = {}
+ while t.attrs_a.length
+ a = t.attrs_a.pop()
+ attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
+ el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs
+
+ # TODO 2. If the newly created element has an xmlns attribute in the
+ # XMLNS namespace whose value is not exactly the same as the element's
+ # namespace, that is a parse error. Similarly, if the newly created
+ # element has an xmlns:xlink attribute in the XMLNS namespace whose
+ # value is not the XLink Namespace, that is a parse error.
+
+ # fixfull: the spec says stuff about form pointers and ownerDocument
+
+ return el
+
+ # FIXME read implement "foster parenting" part
+ # FIXME read spec, do this right
+ # FIXME implement the override target thing
+ # note: this assumes it's an open tag
+ # TODO tree_insert_html_element = (t, ...
+ tree_insert_element = (el, override_target = null, namespace = null) ->
+ dest = adjusted_insertion_location override_target
+ if el.type is TYPE_OPEN_TAG # means it's a "token"
+ el = token_to_element el, namespace, dest[0]
+ # fixfull: Document nodes sometimes can't accept more chidren
+ dest[0].children.splice dest[1], 0, el
+ el.parent = dest[0]
+ open_els.unshift el
+ return el
+
+ # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
+ tree_insert_a_comment = (t) ->
+ # FIXME read spec for "adjusted insertion location, etc, this might be wrong
+ open_els[0].children.push t
+
+ # 8.2.5.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
+ in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
+ for node, i in open_els
+ if node.name is name
+ # FIXME generate implied end tags except those with name==name
+ parse_error() unless i is 0
+ while i > 0
+ open_els.shift()
+ i -= 1
+ open_els.shift()
+ return
+ if special_elements[node.name]?
+ parse_error()
+ return
+ tree_in_body = (t) ->
+ switch t.type
+ when TYPE_TEXT
+ switch t.text
+ when "\u0000"
+ parse_error()
+ when "\t", "\u000a", "\u000c", "\u000d", ' '
+ reconstruct_active_formatting_elements()
+ tree_insert_text t
+ else
+ reconstruct_active_formatting_elements()
+ tree_insert_text t
+ flag_frameset_ok = false
+ when TYPE_COMMENT
+ tree_insert_a_comment t
+ when TYPE_DOCTYPE
+ parse_error()
+ when TYPE_OPEN_TAG
+ switch t.name
+ when 'html'
+ parse_error()
+ return if template_tag_is_open()
+ root_attrs = open_els[open_els.length - 1].children
+ for k, v of t.attrs
+ root_attrs[k] = v unless root_attrs[k]?
+ when 'base', 'basefont', 'bgsound', 'link', 'meta', 'noframes', 'script', 'style', 'template', 'title'
+ # FIXME also do this for </template> (end tag)
+ return tree_in_head t
+ when 'body'
+ parse_error()
+ # TODO
+ when 'frameset'
+ parse_error()
+ # TODO
+ when 'address', 'article', 'aside', 'blockquote', 'center', 'details', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'header', 'hgroup', 'main', 'nav', 'ol', 'p', 'section', 'summary', 'ul'
+ close_p_if_in_button_scope()
+ tree_insert_element t
+ when 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'
+ close_p_if_in_button_scope()
+ if open_els[0].name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
+ parse_error()
+ open_els.shift()
+ tree_insert_element t
+ # TODO lots more to implement here
+ when 'b', 'big', 'code', 'em', 'font', 'i', 's', 'small', 'strike', 'strong', 'tt', 'u'
+ reconstruct_active_formatting_elements()
+ el = tree_insert_element t
+ afe.push el
+ # TODO lots more to implement here
+ else # any other start tag
+ reconstruct_active_formatting_elements()
+ tree_insert_element t
+ when TYPE_EOF
+ ok_tags = {
+ dd: true, dt: true, li: true, p: true, tbody: true, td: true,
+ tfoot: true, th: true, thead: true, tr: true, body: true, html: true,
+ }
+ for t in open_els
+ unless ok_tags[t.name]?
+ parse_error()
+ break
+ # TODO stack of template insertion modes thing
+ flag_parsing = false # stop parsing
+ when TYPE_END_TAG
+ switch t.name
+ when 'body'
+ unless is_in_scope 'body'
+ parse_error()
+ return
+ # TODO implement parse error and move to tree_after_body
+ when 'html'
+ unless is_in_scope 'body' # weird, but it's what the spec says
+ parse_error()
+ return
+ # TODO implement parse error and move to tree_after_body, reprocess
+ # TODO lots more close tags to implement here
+ when 'a', 'b', 'big', 'code', 'em', 'font', 'i', 'nobr', 's', 'small', 'strike', 'strong', 'tt', 'u'
+ adoption_agency t.name
+ # TODO lots more close tags to implement here
+ else
+ in_body_any_other_end_tag t.name
+ return