From: Jason Woofenden Date: Thu, 24 Dec 2015 19:06:00 +0000 (-0500) Subject: code cleanup, remove debug logs X-Git-Url: https://jasonwoof.com/gitweb/?p=peach-html5-editor.git;a=commitdiff_plain;h=0c9099629992eca837582bf019c54e9efd8a7f21 code cleanup, remove debug logs --- diff --git a/parse-html.coffee b/parse-html.coffee index 425fe3c..46195f9 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -89,14 +89,18 @@ QUIRKS_NO = 1 QUIRKS_LIMITED = 2 QUIRKS_YES = 3 +# queue up debug logs, so eg they can be shown only for tests that fail g_debug_log = [] debug_log_reset = -> g_debug_log = [] + return debug_log = (str) -> g_debug_log.push str + return debug_log_each = (cb) -> for str in g_debug_log cb str + return prev_node_id = 0 class Node @@ -120,55 +124,13 @@ class Node @token.flag 'did_self_close', true else @flag 'did_self_close', true + return flag: (key, value = null) -> if value? @flags[key] = value else return @flags[key] - serialize: (shallow = false, show_ids = false) -> # for unit tests - ret = '' - switch @type - when TYPE_TAG - ret += 'tag:' - ret += JSON.stringify @name - ret += ',' - if show_ids - ret += "##{@id}," - if shallow - break - attr_keys = [] - for k of @attrs - attr_keys.push k - attr_keys.sort() - ret += '{' - sep = '' - for k in attr_keys - ret += sep - sep = ',' - ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}" - ret += '},[' - sep = '' - for c in @children - ret += sep - sep = ',' - ret += c.serialize shallow, show_ids - ret += ']' - when TYPE_TEXT - ret += 'text:' - ret += JSON.stringify @text - when TYPE_COMMENT - ret += 'comment:' - ret += JSON.stringify @text - when TYPE_DOCTYPE - ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}" - when TYPE_AFE_MARKER - ret += 'marker' - when TYPE_AAA_BOOKMARK - ret += 'aaa_bookmark' - else - ret += 'unknown:' - console.log "unknown: #{JSON.stringify @}" # backtrace is just as well - return ret + return # helpers: (only take args that are normally known when parser creates nodes) new_open_tag = (name) -> @@ -661,12 +623,14 @@ parse_html = (args) -> stop_parsing = -> flag_parsing = false + return parse_error = -> if args.error_cb? args.error_cb cur else console.log "Parse error at character #{cur} of #{txt.length}" + return # http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements # "Noah's Ark clause" but with three @@ -692,8 +656,11 @@ parse_html = (args) -> afe.splice i, 1 break afe.unshift new_el + return + afe_push_marker = -> afe.unshift new_afe_marker() + return # the functions below impliment the Tree Contstruction algorithm # http://www.w3.org/TR/html5/syntax.html#tree-construction @@ -927,6 +894,7 @@ parse_html = (args) -> node_i += 1 node = open_els[node_i] # 19. Return to the step labeled loop. + return # 8.2.3.2 @@ -958,6 +926,7 @@ parse_html = (args) -> afe[i] = el break if i is 0 i -= 1 # Advance + return # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm # adoption agency algorithm @@ -966,10 +935,6 @@ parse_html = (args) -> # http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p # http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements adoption_agency = (subject) -> - debug_log "adoption_agency()" - debug_log "tree: #{serialize_els doc.children, false, true}" - debug_log "open_els: #{serialize_els open_els, true, true}" - debug_log "afe: #{serialize_els afe, true, true}" # this block implements tha W3C spec # # 1. If the current node is an HTML element whose tag name is subject, # # then run these substeps: @@ -989,7 +954,6 @@ parse_html = (args) -> # if t is el # afe.splice i, 1 # break -# debug_log "aaa: starting off with subject on top of stack, exiting" # return # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm # If the current node is an HTML element whose tag name is subject, and @@ -997,7 +961,6 @@ parse_html = (args) -> # then pop the current node off the stack of open elements, and abort # these steps. if open_els[0].name is subject and open_els[0].namespace is NS_HTML - debug_log "aaa: starting off with subject on top of stack, exiting" # remove it from the list of active formatting elements (if found) in_afe = false for el, i in afe @@ -1005,7 +968,6 @@ parse_html = (args) -> in_afe = true break unless in_afe - debug_log "aaa: ...and not in afe, aaa done" open_els.shift() return # fall through @@ -1029,7 +991,6 @@ parse_html = (args) -> # If there is no such element, then abort these steps and instead # act as described in the "any other end tag" entry above. if fe is null - debug_log "aaa: fe not found in afe" in_body_any_other_end_tag subject return # 6. If formatting element is not in the stack of open elements, @@ -1041,7 +1002,6 @@ parse_html = (args) -> in_open_els = true break unless in_open_els - debug_log "aaa: fe not found in open_els" parse_error() # "remove it from the list" must mean afe, since it's not in open_els afe.splice fe_of_afe, 1 @@ -1050,7 +1010,6 @@ parse_html = (args) -> # the element is not in scope, then this is a parse error; abort # these steps. unless el_is_in_scope fe - debug_log "aaa: fe not in scope" parse_error() return # 8. If formatting element is not the current node, this is a parse @@ -1076,7 +1035,6 @@ parse_html = (args) -> # formatting element from the list of active formatting elements, # and finally abort these steps. if fb is null - debug_log "aaa: no fb" loop t = open_els.shift() if t is fe @@ -1108,21 +1066,12 @@ parse_html = (args) -> node_next = open_els[i + 1] break node = node_next ? node_above - debug_log "inner loop #{inner}" - debug_log "tree: #{serialize_els doc.children, false, true}" - debug_log "open_els: #{serialize_els open_els, true, true}" - debug_log "afe: #{serialize_els afe, true, true}" - debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}" - debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}" - debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}" - debug_log "node: #{node.serialize true, true}" # TODO make sure node_above gets re-set if/when node is removed from open_els # 4. If node is formatting element, then go to the next step in # the overall algorithm. if node is fe break - debug_log "the meat" # 5. If inner loop counter is greater than three and node is in # the list of active formatting elements, then remove node from # the list of active formatting elements. @@ -1131,23 +1080,19 @@ parse_html = (args) -> if t is node if inner > 3 afe.splice i, 1 - debug_log "max out inner" else node_in_afe = true - debug_log "in afe" break # 6. If node is not in the list of active formatting elements, # then remove node from the stack of open elements and then go # back to the step labeled inner loop. unless node_in_afe - debug_log "not in afe" for t, i in open_els if t is node node_above = open_els[i + 1] open_els.splice i, 1 break continue - debug_log "the bones" # 7. create an element for the token for which the element node # was created, in the HTML namespace, with common ancestor as # the intended parent; replace the entry for node in the list @@ -1159,13 +1104,11 @@ parse_html = (args) -> for t, i in afe if t is node afe[i] = new_node - debug_log "replaced in afe" break for t, i in open_els if t is node node_above = open_els[i + 1] open_els[i] = new_node - debug_log "replaced in open_els" break node = new_node # 8. If last node is furthest block, then move the @@ -1175,29 +1118,23 @@ parse_html = (args) -> for t, i in afe if t is bookmark afe.splice i, 1 - debug_log "removed bookmark" break for t, i in afe if t is node # "after" means lower afe.splice i, 0, bookmark # "after as <- - debug_log "placed bookmark after node" - debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}" break # 9. Insert last node into node, first removing it from its # previous parent node if any. if last_node.parent? - debug_log "last_node has parent" for c, i in last_node.parent.children if c is last_node - debug_log "removing last_node from parent" last_node.parent.children.splice i, 1 break node.children.push last_node last_node.parent = node # 10. Let last node be node. last_node = node - debug_log "at last" # 11. Return to the step labeled inner loop. # 14. Insert whatever last node ended up being in the previous step # at the appropriate place for inserting a node, but using common @@ -1208,36 +1145,15 @@ parse_html = (args) -> # * last_node is fb # * last_node is still in the tree (not a duplicate) if last_node.parent? - debug_log "FEFIRST? last_node has parent" for c, i in last_node.parent.children if c is last_node - debug_log "removing last_node from parent" last_node.parent.children.splice i, 1 break - - debug_log "after aaa inner loop" - debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}" - debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}" - debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}" - debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}" - debug_log "tree: #{serialize_els doc.children, false, true}" - - debug_log "insert" - - # can't use standard insert token thing, because it's already in # open_els and must stay at it's current position in open_els dest = adjusted_insertion_location ca dest[0].children.splice dest[1], 0, last_node last_node.parent = dest[0] - - - debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}" - debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}" - debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}" - debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}" - debug_log "tree: #{serialize_els doc.children, false, true}" - # 15. Create an element for the token for which formatting element # was created, in the HTML namespace, with furthest block as the # intended parent. @@ -1275,11 +1191,7 @@ parse_html = (args) -> open_els.splice i, 0, new_element break # 20. Jump back to the step labeled outer loop. - debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}" - debug_log "tree: #{serialize_els doc.children, false, true}" - debug_log "open_els: #{serialize_els open_els, true, true}" - debug_log "afe: #{serialize_els afe, true, true}" - debug_log "AAA DONE" + return # http://www.w3.org/TR/html5/syntax.html#close-a-p-element close_p_element = -> @@ -1290,9 +1202,11 @@ parse_html = (args) -> el = open_els.shift() if el.name is 'p' and el.namespace is NS_HTML return + return close_p_if_in_button_scope = -> if is_in_button_scope 'p', NS_HTML close_p_element() + return # http://www.w3.org/TR/html5/syntax.html#insert-a-character # aka insert_a_character = (t) -> @@ -1305,7 +1219,7 @@ parse_html = (args) -> prev.text += t.text return dest[0].children.splice dest[1], 0, t - + return # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction process_token = (t) -> @@ -1461,13 +1375,14 @@ parse_html = (args) -> return el # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element insert_html_element = (token) -> - insert_foreign_element token, NS_HTML + return insert_foreign_element token, NS_HTML # http://www.w3.org/TR/html5/syntax.html#insert-a-comment # position should be [node, index_within_children] insert_comment = (t, position = null) -> position ?= adjusted_insertion_location() position[0].children.splice position[1], 0, t + return # 8.2.5.2 # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm @@ -1476,17 +1391,20 @@ parse_html = (args) -> tok_state = tok_state_rawtext original_ins_mode = ins_mode ins_mode = ins_mode_text + return parse_generic_rcdata_text = (t) -> insert_html_element t tok_state = tok_state_rcdata original_ins_mode = ins_mode ins_mode = ins_mode_text + return # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags generate_implied_end_tags = (except = null) -> while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except open_els.shift() + return # 8.2.5.4 The rules for parsing tokens in HTML content # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml @@ -1611,12 +1529,14 @@ parse_html = (args) -> head_element_pointer = el ins_mode = ins_mode_in_head process_token t + return # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control open_els.shift() # spec says this will be a 'head' node ins_mode = ins_mode_after_head process_token t + return ins_mode_in_head = (t) -> if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ') insert_character t @@ -1695,6 +1615,7 @@ parse_html = (args) -> parse_error() return ins_mode_in_head_else t + return # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript ins_mode_in_head_noscript_else = (t) -> @@ -1702,6 +1623,7 @@ parse_html = (args) -> open_els.shift() ins_mode = ins_mode_in_head process_token t + return ins_mode_in_head_noscript = (t) -> if t.type is TYPE_DOCTYPE parse_error() @@ -1726,8 +1648,6 @@ parse_html = (args) -> ins_mode_in_head_noscript_else t return - - # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode ins_mode_after_head_else = (t) -> body_tok = new_open_tag 'body' @@ -1765,7 +1685,6 @@ parse_html = (args) -> if el is head_element_pointer open_els.splice i, 1 return - console.log "warning: 23904 couldn't find head element in open_els" return if t.type is TYPE_END_TAG and t.name is 'template' ins_mode_in_head t @@ -1778,6 +1697,7 @@ parse_html = (args) -> return # Anything else ins_mode_after_head_else t + return # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it @@ -2355,7 +2275,7 @@ parse_html = (args) -> open_els.shift() ins_mode = original_ins_mode return - console.log 'warning: end of ins_mode_text reached' + return # the functions below implement the tokenizer stats described here: # http://www.w3.org/TR/html5/syntax.html#tokenization @@ -2456,6 +2376,7 @@ parse_html = (args) -> ins_mode_in_body t else ins_mode_in_table_else t + return # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext @@ -2482,6 +2403,7 @@ parse_html = (args) -> pending_table_character_tokens = [] ins_mode = original_ins_mode process_token t + return # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption ins_mode_in_caption = (t) -> @@ -2517,6 +2439,7 @@ parse_html = (args) -> return # Anything else ins_mode_in_body t + return # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup ins_mode_in_column_group = (t) -> @@ -2605,6 +2528,7 @@ parse_html = (args) -> return # Anything else ins_mode_in_table t + return # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr ins_mode_in_row = (t) -> @@ -2646,6 +2570,7 @@ parse_html = (args) -> return # Anything else ins_mode_in_table t + return # http://www.w3.org/TR/html5/syntax.html#close-the-cell close_the_cell = -> @@ -2658,6 +2583,7 @@ parse_html = (args) -> break clear_afe_to_marker() ins_mode = ins_mode_in_row + return # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd ins_mode_in_cell = (t) -> @@ -2701,6 +2627,7 @@ parse_html = (args) -> return # Anything Else ins_mode_in_body t + return # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect ins_mode_in_select = (t) -> @@ -2867,6 +2794,7 @@ parse_html = (args) -> template_ins_modes.shift() reset_ins_mode() process_token t + return # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody ins_mode_after_body = (t) -> @@ -2896,6 +2824,7 @@ parse_html = (args) -> parse_error() ins_mode = ins_mode_in_body process_token t + return # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset ins_mode_in_frameset = (t) -> @@ -3083,6 +3012,7 @@ parse_html = (args) -> if node.namespace is NS_HTML break ins_mode t # explicitly call HTML insertion mode + return # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state @@ -3276,12 +3206,8 @@ parse_html = (args) -> # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token is_appropriate_end_tag = (t) -> - # spec says to check against "the tag name of the last start tag to - # have been emitted from this tokenizer", but this is only called from - # the various "raw" states, so it's hopefully ok to assume that - # open_els[0].name will work instead TODO: verify this after the script - # data states are implemented - debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}" + # fixfull: this assumes that open_els[0].name is "the tag name of the last + # start tag to have been emitted from this tokenizer" return t.type is TYPE_END_TAG and t.name is open_els[0].name # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state @@ -3812,6 +3738,7 @@ parse_html = (args) -> # Anything else tok_cur_tag.attrs_a.unshift [c, ''] tok_state = tok_state_attribute_name + return # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state tok_state_before_attribute_value = -> @@ -4762,6 +4689,8 @@ parse_html = (args) -> txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this + return + # http://www.w3.org/TR/html5/syntax.html#tree-construction parse_main_loop = -> while flag_parsing @@ -4777,15 +4706,6 @@ parse_html = (args) -> return fragment_root.children return doc.children -serialize_els = (els, shallow, show_ids) -> - serialized = '' - sep = '' - for t in els - serialized += sep - sep = ',' - serialized += t.serialize shallow, show_ids - return serialized - module.exports.parse_html = parse_html module.exports.debug_log_reset = debug_log_reset module.exports.debug_log_each = debug_log_each