JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
code cleanup, remove debug logs
authorJason Woofenden <jason@jasonwoof.com>
Thu, 24 Dec 2015 19:06:00 +0000 (14:06 -0500)
committerJason Woofenden <jason@jasonwoof.com>
Thu, 24 Dec 2015 19:06:00 +0000 (14:06 -0500)
parse-html.coffee

index 425fe3c..46195f9 100644 (file)
@@ -89,14 +89,18 @@ QUIRKS_NO = 1
 QUIRKS_LIMITED = 2
 QUIRKS_YES = 3
 
+# queue up debug logs, so eg they can be shown only for tests that fail
 g_debug_log = []
 debug_log_reset = ->
        g_debug_log = []
+       return
 debug_log = (str) ->
        g_debug_log.push str
+       return
 debug_log_each = (cb) ->
        for str in g_debug_log
                cb str
+       return
 
 prev_node_id = 0
 class Node
@@ -120,55 +124,13 @@ class Node
                        @token.flag 'did_self_close', true
                else
                        @flag 'did_self_close', true
+               return
        flag: (key, value = null) ->
                if value?
                        @flags[key] = value
                else
                        return @flags[key]
-       serialize: (shallow = false, show_ids = false) -> # for unit tests
-               ret = ''
-               switch @type
-                       when TYPE_TAG
-                               ret += 'tag:'
-                               ret += JSON.stringify @name
-                               ret += ','
-                               if show_ids
-                                       ret += "##{@id},"
-                               if shallow
-                                       break
-                               attr_keys = []
-                               for k of @attrs
-                                       attr_keys.push k
-                               attr_keys.sort()
-                               ret += '{'
-                               sep = ''
-                               for k in attr_keys
-                                       ret += sep
-                                       sep = ','
-                                       ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
-                               ret += '},['
-                               sep = ''
-                               for c in @children
-                                       ret += sep
-                                       sep = ','
-                                       ret += c.serialize shallow, show_ids
-                               ret += ']'
-                       when TYPE_TEXT
-                               ret += 'text:'
-                               ret += JSON.stringify @text
-                       when TYPE_COMMENT
-                               ret += 'comment:'
-                               ret += JSON.stringify @text
-                       when TYPE_DOCTYPE
-                               ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
-                       when TYPE_AFE_MARKER
-                               ret += 'marker'
-                       when TYPE_AAA_BOOKMARK
-                               ret += 'aaa_bookmark'
-                       else
-                               ret += 'unknown:'
-                               console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
-               return ret
+               return
 
 # helpers: (only take args that are normally known when parser creates nodes)
 new_open_tag = (name) ->
@@ -661,12 +623,14 @@ parse_html = (args) ->
 
        stop_parsing = ->
                flag_parsing = false
+               return
 
        parse_error = ->
                if args.error_cb?
                        args.error_cb cur
                else
                        console.log "Parse error at character #{cur} of #{txt.length}"
+               return
 
        # http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
        # "Noah's Ark clause" but with three
@@ -692,8 +656,11 @@ parse_html = (args) ->
                                                afe.splice i, 1
                                                break
                afe.unshift new_el
+               return
+
        afe_push_marker = ->
                afe.unshift new_afe_marker()
+               return
 
        # the functions below impliment the Tree Contstruction algorithm
        # http://www.w3.org/TR/html5/syntax.html#tree-construction
@@ -927,6 +894,7 @@ parse_html = (args) ->
                        node_i += 1
                        node = open_els[node_i]
                        # 19. Return to the step labeled loop.
+               return
 
        # 8.2.3.2
 
@@ -958,6 +926,7 @@ parse_html = (args) ->
                        afe[i] = el
                        break if i is 0
                        i -= 1 # Advance
+               return
 
        # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
        # adoption agency algorithm
@@ -966,10 +935,6 @@ parse_html = (args) ->
        #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
        #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
        adoption_agency = (subject) ->
-               debug_log "adoption_agency()"
-               debug_log "tree: #{serialize_els doc.children, false, true}"
-               debug_log "open_els: #{serialize_els open_els, true, true}"
-               debug_log "afe: #{serialize_els afe, true, true}"
 # this block implements tha W3C spec
 #              # 1. If the current node is an HTML element whose tag name is subject,
 #              # then run these substeps:
@@ -989,7 +954,6 @@ parse_html = (args) ->
 #                              if t is el
 #                                      afe.splice i, 1
 #                                      break
-#                      debug_log "aaa: starting off with subject on top of stack, exiting"
 #                      return
 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
                # If the current node is an HTML element whose tag name is subject, and
@@ -997,7 +961,6 @@ parse_html = (args) ->
                # then pop the current node off the stack of open elements, and abort
                # these steps.
                if open_els[0].name is subject and open_els[0].namespace is NS_HTML
-                       debug_log "aaa: starting off with subject on top of stack, exiting"
                        # remove it from the list of active formatting elements (if found)
                        in_afe = false
                        for el, i in afe
@@ -1005,7 +968,6 @@ parse_html = (args) ->
                                        in_afe = true
                                        break
                        unless in_afe
-                               debug_log "aaa: ...and not in afe, aaa done"
                                open_els.shift()
                                return
                        # fall through
@@ -1029,7 +991,6 @@ parse_html = (args) ->
                        # If there is no such element, then abort these steps and instead
                        # act as described in the "any other end tag" entry above.
                        if fe is null
-                               debug_log "aaa: fe not found in afe"
                                in_body_any_other_end_tag subject
                                return
                        # 6. If formatting element is not in the stack of open elements,
@@ -1041,7 +1002,6 @@ parse_html = (args) ->
                                        in_open_els = true
                                        break
                        unless in_open_els
-                               debug_log "aaa: fe not found in open_els"
                                parse_error()
                                # "remove it from the list" must mean afe, since it's not in open_els
                                afe.splice fe_of_afe, 1
@@ -1050,7 +1010,6 @@ parse_html = (args) ->
                        # the element is not in scope, then this is a parse error; abort
                        # these steps.
                        unless el_is_in_scope fe
-                               debug_log "aaa: fe not in scope"
                                parse_error()
                                return
                        # 8. If formatting element is not the current node, this is a parse
@@ -1076,7 +1035,6 @@ parse_html = (args) ->
                        # formatting element from the list of active formatting elements,
                        # and finally abort these steps.
                        if fb is null
-                               debug_log "aaa: no fb"
                                loop
                                        t = open_els.shift()
                                        if t is fe
@@ -1108,21 +1066,12 @@ parse_html = (args) ->
                                                node_next = open_els[i + 1]
                                                break
                                node = node_next ? node_above
-                               debug_log "inner loop #{inner}"
-                               debug_log "tree: #{serialize_els doc.children, false, true}"
-                               debug_log "open_els: #{serialize_els open_els, true, true}"
-                               debug_log "afe: #{serialize_els afe, true, true}"
-                               debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
-                               debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
-                               debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
-                               debug_log "node: #{node.serialize true, true}"
                                # TODO make sure node_above gets re-set if/when node is removed from open_els
 
                                # 4. If node is formatting element, then go to the next step in
                                # the overall algorithm.
                                if node is fe
                                        break
-                               debug_log "the meat"
                                # 5. If inner loop counter is greater than three and node is in
                                # the list of active formatting elements, then remove node from
                                # the list of active formatting elements.
@@ -1131,23 +1080,19 @@ parse_html = (args) ->
                                        if t is node
                                                if inner > 3
                                                        afe.splice i, 1
-                                                       debug_log "max out inner"
                                                else
                                                        node_in_afe = true
-                                                       debug_log "in afe"
                                                break
                                # 6. If node is not in the list of active formatting elements,
                                # then remove node from the stack of open elements and then go
                                # back to the step labeled inner loop.
                                unless node_in_afe
-                                       debug_log "not in afe"
                                        for t, i in open_els
                                                if t is node
                                                        node_above = open_els[i + 1]
                                                        open_els.splice i, 1
                                                        break
                                        continue
-                               debug_log "the bones"
                                # 7. create an element for the token for which the element node
                                # was created, in the HTML namespace, with common ancestor as
                                # the intended parent; replace the entry for node in the list
@@ -1159,13 +1104,11 @@ parse_html = (args) ->
                                for t, i in afe
                                        if t is node
                                                afe[i] = new_node
-                                               debug_log "replaced in afe"
                                                break
                                for t, i in open_els
                                        if t is node
                                                node_above = open_els[i + 1]
                                                open_els[i] = new_node
-                                               debug_log "replaced in open_els"
                                                break
                                node = new_node
                                # 8. If last node is furthest block, then move the
@@ -1175,29 +1118,23 @@ parse_html = (args) ->
                                        for t, i in afe
                                                if t is bookmark
                                                        afe.splice i, 1
-                                                       debug_log "removed bookmark"
                                                        break
                                        for t, i in afe
                                                if t is node
                                                        # "after" means lower
                                                        afe.splice i, 0, bookmark # "after as <-
-                                                       debug_log "placed bookmark after node"
-                                                       debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
                                                        break
                                # 9. Insert last node into node, first removing it from its
                                # previous parent node if any.
                                if last_node.parent?
-                                       debug_log "last_node has parent"
                                        for c, i in last_node.parent.children
                                                if c is last_node
-                                                       debug_log "removing last_node from parent"
                                                        last_node.parent.children.splice i, 1
                                                        break
                                node.children.push last_node
                                last_node.parent = node
                                # 10. Let last node be node.
                                last_node = node
-                               debug_log "at last"
                                # 11. Return to the step labeled inner loop.
                        # 14. Insert whatever last node ended up being in the previous step
                        # at the appropriate place for inserting a node, but using common
@@ -1208,36 +1145,15 @@ parse_html = (args) ->
                        #   * last_node is fb
                        #   * last_node is still in the tree (not a duplicate)
                        if last_node.parent?
-                               debug_log "FEFIRST? last_node has parent"
                                for c, i in last_node.parent.children
                                        if c is last_node
-                                               debug_log "removing last_node from parent"
                                                last_node.parent.children.splice i, 1
                                                break
-
-                       debug_log "after aaa inner loop"
-                       debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
-                       debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
-                       debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
-                       debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
-                       debug_log "tree: #{serialize_els doc.children, false, true}"
-
-                       debug_log "insert"
-
-
                        # can't use standard insert token thing, because it's already in
                        # open_els and must stay at it's current position in open_els
                        dest = adjusted_insertion_location ca
                        dest[0].children.splice dest[1], 0, last_node
                        last_node.parent = dest[0]
-
-
-                       debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
-                       debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
-                       debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
-                       debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
-                       debug_log "tree: #{serialize_els doc.children, false, true}"
-
                        # 15. Create an element for the token for which formatting element
                        # was created, in the HTML namespace, with furthest block as the
                        # intended parent.
@@ -1275,11 +1191,7 @@ parse_html = (args) ->
                                        open_els.splice i, 0, new_element
                                        break
                        # 20. Jump back to the step labeled outer loop.
-                       debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
-                       debug_log "tree: #{serialize_els doc.children, false, true}"
-                       debug_log "open_els: #{serialize_els open_els, true, true}"
-                       debug_log "afe: #{serialize_els afe, true, true}"
-               debug_log "AAA DONE"
+               return
 
        # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
        close_p_element = ->
@@ -1290,9 +1202,11 @@ parse_html = (args) ->
                        el = open_els.shift()
                        if el.name is 'p' and el.namespace is NS_HTML
                                return
+               return
        close_p_if_in_button_scope = ->
                if is_in_button_scope 'p', NS_HTML
                        close_p_element()
+               return
 
        # http://www.w3.org/TR/html5/syntax.html#insert-a-character
        # aka insert_a_character = (t) ->
@@ -1305,7 +1219,7 @@ parse_html = (args) ->
                                prev.text += t.text
                                return
                dest[0].children.splice dest[1], 0, t
-
+               return
 
        # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
        process_token = (t) ->
@@ -1461,13 +1375,14 @@ parse_html = (args) ->
                return el
        # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
        insert_html_element = (token) ->
-               insert_foreign_element token, NS_HTML
+               return insert_foreign_element token, NS_HTML
 
        # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
        # position should be [node, index_within_children]
        insert_comment = (t, position = null) ->
                position ?= adjusted_insertion_location()
                position[0].children.splice position[1], 0, t
+               return
 
        # 8.2.5.2
        # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
@@ -1476,17 +1391,20 @@ parse_html = (args) ->
                tok_state = tok_state_rawtext
                original_ins_mode = ins_mode
                ins_mode = ins_mode_text
+               return
        parse_generic_rcdata_text = (t) ->
                insert_html_element t
                tok_state = tok_state_rcdata
                original_ins_mode = ins_mode
                ins_mode = ins_mode_text
+               return
 
        # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
        # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
        generate_implied_end_tags = (except = null) ->
                while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
                        open_els.shift()
+               return
 
        # 8.2.5.4 The rules for parsing tokens in HTML content
        # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
@@ -1611,12 +1529,14 @@ parse_html = (args) ->
                head_element_pointer = el
                ins_mode = ins_mode_in_head
                process_token t
+               return
 
        # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
        ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
                open_els.shift() # spec says this will be a 'head' node
                ins_mode = ins_mode_after_head
                process_token t
+               return
        ins_mode_in_head = (t) ->
                if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
                        insert_character t
@@ -1695,6 +1615,7 @@ parse_html = (args) ->
                        parse_error()
                        return
                ins_mode_in_head_else t
+               return
 
        # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
        ins_mode_in_head_noscript_else = (t) ->
@@ -1702,6 +1623,7 @@ parse_html = (args) ->
                open_els.shift()
                ins_mode = ins_mode_in_head
                process_token t
+               return
        ins_mode_in_head_noscript = (t) ->
                if t.type is TYPE_DOCTYPE
                        parse_error()
@@ -1726,8 +1648,6 @@ parse_html = (args) ->
                ins_mode_in_head_noscript_else t
                return
 
-
-
        # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
        ins_mode_after_head_else = (t) ->
                body_tok = new_open_tag 'body'
@@ -1765,7 +1685,6 @@ parse_html = (args) ->
                                if el is head_element_pointer
                                        open_els.splice i, 1
                                        return
-                       console.log "warning: 23904 couldn't find head element in open_els"
                        return
                if t.type is TYPE_END_TAG and t.name is 'template'
                        ins_mode_in_head t
@@ -1778,6 +1697,7 @@ parse_html = (args) ->
                        return
                # Anything else
                ins_mode_after_head_else t
+               return
 
        # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
        in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
@@ -2355,7 +2275,7 @@ parse_html = (args) ->
                        open_els.shift()
                        ins_mode = original_ins_mode
                        return
-               console.log 'warning: end of ins_mode_text reached'
+               return
 
        # the functions below implement the tokenizer stats described here:
        # http://www.w3.org/TR/html5/syntax.html#tokenization
@@ -2456,6 +2376,7 @@ parse_html = (args) ->
                                ins_mode_in_body t
                        else
                                ins_mode_in_table_else t
+               return
 
 
        # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
@@ -2482,6 +2403,7 @@ parse_html = (args) ->
                pending_table_character_tokens = []
                ins_mode = original_ins_mode
                process_token t
+               return
 
        # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
        ins_mode_in_caption = (t) ->
@@ -2517,6 +2439,7 @@ parse_html = (args) ->
                        return
                # Anything else
                ins_mode_in_body t
+               return
 
        # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
        ins_mode_in_column_group = (t) ->
@@ -2605,6 +2528,7 @@ parse_html = (args) ->
                        return
                # Anything else
                ins_mode_in_table t
+               return
 
        # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
        ins_mode_in_row = (t) ->
@@ -2646,6 +2570,7 @@ parse_html = (args) ->
                        return
                # Anything else
                ins_mode_in_table t
+               return
 
        # http://www.w3.org/TR/html5/syntax.html#close-the-cell
        close_the_cell = ->
@@ -2658,6 +2583,7 @@ parse_html = (args) ->
                                break
                clear_afe_to_marker()
                ins_mode = ins_mode_in_row
+               return
 
        # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
        ins_mode_in_cell = (t) ->
@@ -2701,6 +2627,7 @@ parse_html = (args) ->
                        return
                # Anything Else
                ins_mode_in_body t
+               return
 
        # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
        ins_mode_in_select = (t) ->
@@ -2867,6 +2794,7 @@ parse_html = (args) ->
                        template_ins_modes.shift()
                        reset_ins_mode()
                        process_token t
+               return
 
        # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
        ins_mode_after_body = (t) ->
@@ -2896,6 +2824,7 @@ parse_html = (args) ->
                parse_error()
                ins_mode = ins_mode_in_body
                process_token t
+               return
 
        # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
        ins_mode_in_frameset = (t) ->
@@ -3083,6 +3012,7 @@ parse_html = (args) ->
                                if node.namespace is NS_HTML
                                        break
                        ins_mode t # explicitly call HTML insertion mode
+               return
 
 
        # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
@@ -3276,12 +3206,8 @@ parse_html = (args) ->
 
        # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
        is_appropriate_end_tag = (t) ->
-               # spec says to check against "the tag name of the last start tag to
-               # have been emitted from this tokenizer", but this is only called from
-               # the various "raw" states, so it's hopefully ok to assume that
-               # open_els[0].name will work instead TODO: verify this after the script
-               # data states are implemented
-               debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
+               # fixfull: this assumes that open_els[0].name is "the tag name of the last
+               # start tag to have been emitted from this tokenizer"
                return t.type is TYPE_END_TAG and t.name is open_els[0].name
 
        # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
@@ -3812,6 +3738,7 @@ parse_html = (args) ->
                # Anything else
                tok_cur_tag.attrs_a.unshift [c, '']
                tok_state = tok_state_attribute_name
+               return
 
        # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
        tok_state_before_attribute_value = ->
@@ -4762,6 +4689,8 @@ parse_html = (args) ->
                txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
                txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 
+               return
+
        # http://www.w3.org/TR/html5/syntax.html#tree-construction
        parse_main_loop = ->
                while flag_parsing
@@ -4777,15 +4706,6 @@ parse_html = (args) ->
                return fragment_root.children
        return doc.children
 
-serialize_els = (els, shallow, show_ids) ->
-       serialized = ''
-       sep = ''
-       for t in els
-               serialized += sep
-               sep = ','
-               serialized += t.serialize shallow, show_ids
-       return serialized
-
 module.exports.parse_html = parse_html
 module.exports.debug_log_reset = debug_log_reset
 module.exports.debug_log_each = debug_log_each