JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
implement fragment parsing algorithm
authorJason Woofenden <jason@jasonwoof.com>
Thu, 24 Dec 2015 18:49:56 +0000 (13:49 -0500)
committerJason Woofenden <jason@jasonwoof.com>
Thu, 24 Dec 2015 18:49:56 +0000 (13:49 -0500)
parse-html.coffee

index 20bc99c..425fe3c 100644 (file)
@@ -817,8 +817,8 @@ parse_html = (args) ->
                loop
                        if node_i is open_els.length - 1
                                last = true
-                               # fixfull (fragment case)
-
+                               if flag_fragment_parsing
+                                       node = context_element
                        # 4. If node is a select element, run these substeps:
                        if node.name is 'select' and node.namespace is NS_HTML
                                # 1. If last is true, jump to the step below labeled done.
@@ -1561,6 +1561,7 @@ parse_html = (args) ->
                if t.type is TYPE_START_TAG and t.name is 'html'
                        el = token_to_element t, NS_HTML, doc
                        doc.children.push el
+                       el.document = doc
                        open_els.unshift(el)
                        # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
                        ins_mode = ins_mode_before_head
@@ -1574,7 +1575,7 @@ parse_html = (args) ->
                # Anything else
                el = token_to_element new_open_tag('html'), NS_HTML, doc
                doc.children.push el
-               el.parent = doc
+               el.document = doc
                open_els.unshift el
                # ?fixfull browsing context
                ins_mode = ins_mode_before_head
@@ -4659,8 +4660,9 @@ parse_html = (args) ->
        # see comments on TYPE_TAG/etc for the structure of this data
        txt = args.html
        cur = 0
-       doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
+       doc = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
        doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
+       fragment_root = null # fragment parsing algorithm returns children of this
        open_els = []
        afe = [] # active formatting elements
        template_ins_modes = []
@@ -4674,21 +4676,92 @@ parse_html = (args) ->
        temporary_buffer = null
        pending_table_character_tokens = []
        head_element_pointer = null
-       flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
-       context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
+       flag_fragment_parsing = false
+       context_element = null
        prev_node_id = 0 # just for debugging
 
        # tokenizer initialization
        tok_state = tok_state_data
 
-       # text pre-processing
-       # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
-       txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
-       txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
+       parse_init = ->
+               # fragment parsing (text arg)
+               if args.fragment?
+                       # this handles the fragment from the tests in the format described here:
+                       # https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
+                       f = args.fragment
+                       ns = NS_HTML
+                       if f.substr(0, 5) is 'math '
+                               f = f.substr 5
+                               ns = NS_MATHML
+                       else if f.substr(0, 4) is 'svg '
+                               f = f.substr 4
+                               ns = NS_SVG
+                       t = new_open_tag f
+                       context_element = token_to_element t, ns
+                       context_element.document = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
+                       context_element.document.flag 'quirks mode', QUIRKS_NO
+               # fragment parsing (Node arg)
+               if args.context?
+                       context_element = args.context
+
+               # http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
+               # fragment parsing algorithm
+               if context_element?
+                       flag_fragment_parsing = true
+                       doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
+                       # search up the tree from context, to try to find it's document,
+                       # because this file only puts a "document" property on the root
+                       # element.
+                       old_doc = null
+                       el = context_element
+                       loop
+                               if el.document?
+                                       old_doc = el.document
+                                       break
+                               if el.parent
+                                       el = el.parent
+                               else
+                                       break
+                       if old_doc
+                               doc.flag 'quirks mode', old_doc.flag 'quirks mode'
+                       # set tok_state
+                       if context_element.namespace is NS_HTML
+                               switch context_element.name
+                                       when 'title', 'textarea'
+                                               tok_state = tok_state_rcdata
+                                       when 'style', 'xmp', 'iframe', 'noembed', 'noframes'
+                                               tok_state = tok_state_rawtext
+                                       when 'script'
+                                               tok_state = tok_state_script_data
+                                       when 'noscript'
+                                               if flag_scripting
+                                                       tok_state = tok_state_rawtext
+                                       when 'plaintext'
+                                               tok_state = tok_state_plaintext
+                       fragment_root = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
+                       doc.children.push fragment_root
+                       fragment_root.document = doc
+                       open_els = [fragment_root]
+                       if context_element.name is 'template' and context_element.namespace is NS_HTML
+                               template_ins_modes.unshift ins_mode_in_template
+                       # fixfull create token for context (it should have it's original one already)
+                       reset_ins_mode()
+                       # set form_element pointer... in the foreign doc?!
+                       el = context_element
+                       loop
+                               if el.name is 'form' and el.namespace is NS_HTML
+                                       form_element_pointer = el
+                                       break
+                               if el.parent
+                                       el = el.parent
+                               else
+                                       break
+
+               # text pre-processing
+               # FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
+               txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
+               txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 
-       if args.name is "webkit01.dat #12"
-               console.log "hi"
-       # proccess input
        # http://www.w3.org/TR/html5/syntax.html#tree-construction
        parse_main_loop = ->
                while flag_parsing
@@ -4696,7 +4769,12 @@ parse_html = (args) ->
                        if t?
                                process_token t
                                # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
+               return
+       parse_init()
        parse_main_loop()
+
+       if flag_fragment_parsing
+               return fragment_root.children
        return doc.children
 
 serialize_els = (els, shallow, show_ids) ->