From dc04008f5d598663a2bf763f169cf30594a5c4b5 Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Fri, 18 Dec 2015 00:25:21 -0500 Subject: [PATCH] implemented adoption agency algorithm --- parse-html.coffee | 422 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 344 insertions(+), 78 deletions(-) diff --git a/parse-html.coffee b/parse-html.coffee index 6db8ab0..f0f5e91 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -22,8 +22,32 @@ # indicate places where additional code is needed for full HTML document # parsing. # -# Instead, the data structure produced by this parser is an array of nodes. +# Instead, the data structure produced by this parser is an array of Nodes. + + +# stacks/lists +# +# the spec uses a many different words do indicate which ends of lists/stacks +# they are talking about (and relative movement within the lists/stacks). This +# section splains. I'm implementing "lists" (afe and open_els) the same way +# (both as stacks) +# +# stacks grow downward (current element is index=0) # +# example: open_els = [a, b, c, d, e, f, g] +# +# "grows downwards" means it's visualized like this: (index: el, names) +# +# 6: g "start of the list", "topmost" +# 5: f +# 4: e "previous" (to d), "above" +# 3: d (previous/next are relative to this element) +# 2: c "next", "after", "lower", "below" +# 1: b +# 0: a "end of the list", "current node", "bottommost", "last" + + + # Each node is an obect of the Node class. Here are the Node types: TYPE_TAG = 0 # name, {attributes}, [children] TYPE_TEXT = 1 # "text" @@ -41,6 +65,16 @@ NS_HTML = 1 NS_MATHML = 2 NS_SVG = 3 +g_debug_log = [] +debug_log_reset = -> + g_debug_log = [] +debug_log = (str) -> + g_debug_log.push str +debug_log_each = (cb) -> + for str in g_debug_log + cb str + +prev_node_id = 0 class Node constructor: (type, args = {}) -> @type = type # one of the TYPE_* constants above @@ -51,25 +85,33 @@ class Node @children = args.children ? [] @namespace = args.namespace ? NS_HTML @parent = args.parent ? null + if args.id? + @id = "#{args.id}+" + else + @id = "#{++prev_node_id}" shallow_clone: -> # return a new node that's the same except without the children or parent # WARNING this doesn't work right on open tags that are still being parsed attrs = {} attrs[k] = v for k, v of @attrs - return new Node @type, name: @name, text: @text, attrs: attrs, namespace: @namespace - serialize: -> # for unit tests + return new Node @type, name: @name, text: @text, attrs: attrs, namespace: @namespace, id: @id + serialize: (shallow = false, show_ids = false) -> # for unit tests ret = '' switch @type when TYPE_TAG ret += 'tag:' ret += JSON.stringify @name ret += ',' + if show_ids + ret += "##{@id}," + if shallow + break ret += JSON.stringify @attrs ret += ',[' sep = '' for c in @children ret += sep sep = ',' - ret += c.serialize() + ret += c.serialize shallow, show_ids ret += ']' when TYPE_TEXT ret += 'text:' @@ -80,6 +122,10 @@ class Node when TYPE_DOCTYPE ret += 'doctype' # FIXME + when TYPE_MARKER + ret += 'marker' + when TYPE_AAA_BOOKMARK + ret += 'aaa_bookmark' else ret += 'unknown:' console.log "unknown: #{JSON.stringify @}" # backtrace is just as well @@ -90,6 +136,8 @@ new_open_tag = (name) -> return new Node TYPE_OPEN_TAG, name: name new_end_tag = (name) -> return new Node TYPE_END_TAG, name: name +new_element = (name) -> + return new Node TYPE_TAG, name: name new_text_node = (txt) -> return new Node TYPE_TEXT, text: txt new_comment_node = (txt) -> @@ -228,8 +276,24 @@ formatting_elements = { u: true } +# all html I presume +end_tag_implied = { + dd: true + dt: true + li: true + option: true + optgroup: true + p: true + rb: true + rp: true + rt: true + rtc: true +} + el_is_special = (e) -> - return special_elements[e] is e.namespace + return special_elements[e.name]? + # FIXME it should really be: + #return special_elements[e.name] is e.namespace # decode_named_char_ref() # @@ -279,45 +343,49 @@ parse_html = (txt, parse_error_cb = null) -> # But first... the helpers template_tag_is_open = -> for t in open_els - if t.type is TYPE_TAG and t.name is 'template' + if t.name is 'template' # maybe should also check: and t.namespace is 'html' return true return false - is_in_scope_x = (tag_name, scope) -> + is_in_scope_x = (tag_name, scope, namespace) -> for t in open_els - if t.name is tag_name + if t.name is tag_name and (namespace is null or namespace is t.namespace) return true - if t.name of scope + if scope[t.name] is t.namespace return false return false - is_in_scope_x_y = (tag_name, scope, scope2) -> + is_in_scope_x_y = (tag_name, scope, scope2, namespace) -> for t in open_els - if t.name is tag_name + if t.name is tag_name and (namespace is null or namespace is t.namespace) return true - if t.name of scope + if scope[t.name] is t.namespace return false - if t.name of scope2 + if scope2[t.name] is t.namespace return false return false standard_scopers = { # FIXME these are supposed to be namespace specific - 'applet': true, 'caption': true, 'html': true, 'table': true, 'td': true, - 'th': true, 'marquee': true, 'object': true, 'template': true, 'mi': true, - 'mo': true, 'mn': true, 'ms': true, 'mtext': true, 'annotation-xml': true, - 'foreignObject': true, 'desc': true, 'title' + applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML, + td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML, + template: NS_HTML, mi: NS_MATHML, + + mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML, + 'annotation-xml': NS_MATHML, + + foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG } - button_scopers = button: true - li_scopers = ol: true, ul: true - table_scopers = html: true, table: true, template: true - is_in_scope = (tag_name) -> - return is_in_scope_x tag_name, standard_scopers - is_in_button_scope = (tag_name) -> - return is_in_scope_x_y tag_name, standard_scopers, button_scopers - is_in_table_scope = (tag_name) -> - return is_in_scope_x tag_name, table_scopers - is_in_select_scope = (tag_name) -> + button_scopers = button: NS_HTML + li_scopers = ol: NS_HTML, ul: NS_HTML + table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML + is_in_scope = (tag_name, namespace = null) -> + return is_in_scope_x tag_name, standard_scopers, namespace + is_in_button_scope = (tag_name, namespace = null) -> + return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace + is_in_table_scope = (tag_name, namespace = null) -> + return is_in_scope_x tag_name, table_scopers, namespace + is_in_select_scope = (tag_name, namespace = null) -> for t in open_els - if t.name is tag_name + if t.name is tag_name and (namespace is null or namespace is t.namespace) return true - if t.name isnt 'optgroup' and t.name isnt 'option' + if t.ns isnt NS_HTML t.name isnt 'optgroup' and t.name isnt 'option' return false return false # this checks for a particular element, not by name @@ -325,7 +393,7 @@ parse_html = (txt, parse_error_cb = null) -> for t in open_els if t is el return true - if t.name of standard_scopers + if standard_scopers[t.name] is t.namespace return false return false @@ -355,6 +423,10 @@ parse_html = (txt, parse_error_cb = null) -> # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm # adoption agency algorithm + # overview here: + # http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i + # http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p + # http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements adoption_agency = (subject) -> if open_els[0].name is subject el = open_els[0] @@ -370,82 +442,136 @@ parse_html = (txt, parse_error_cb = null) -> if outer >= 8 return outer += 1 + # 5. Let formatting element be the last element in the list of + # active formatting elements that: is between the end of the list + # and the last scope marker in the list, if any, or the start of + # the list otherwise, and has the tag name subject. fe = null - for t, fe_index in afe + for t, fe_of_afe in afe if t.type is TYPE_MARKER break if t.name is subject fe = t break + # If there is no such element, then abort these steps and instead + # act as described in the "any other end tag" entry above. if fe is null in_body_any_other_end_tag subject return + # 6. If formatting element is not in the stack of open elements, + # then this is a parse error; remove the element from the list, and + # abort these steps. in_open_els = false - for t in open_els + for t, fe_of_open_els in open_els if t is fe in_open_els = true break unless in_open_els parse_error() # "remove it from the list" must mean afe, since it's not in open_els - afe.splice fe_index, 1 + afe.splice fe_of_afe, 1 return + # 7. If formatting element is in the stack of open elements, but + # the element is not in scope, then this is a parse error; abort + # these steps. unless el_is_in_scope fe parse_error() return + # 8. If formatting element is not the current node, this is a parse + # error. (But do not abort these steps.) unless open_els[0] is fe parse_error() # continue + # 9. Let furthest block be the topmost node in the stack of open + # elements that is lower in the stack than formatting element, and + # is an element in the special category. There might not be one. fb = null - fb_index + fb_of_open_els = null for t, i in open_els if t is fe break if el_is_special t fb = t - fb_index = i + fb_of_open_els = i + # and continue, to see if there's one that's more "topmost" + # 10. If there is no furthest block, then the UA must first pop all + # the nodes from the bottom of the stack of open elements, from the + # current node up to and including formatting element, then remove + # formatting element from the list of active formatting elements, + # and finally abort these steps. if fb is null loop t = open_els.shift() if t is fe - afe.splice fe_index, 1 + afe.splice fe_of_afe, 1 return - ca = open_els[fe_index + 1] # common ancestor - node_above = open_els[fb_index + 1] # next node if node isn't in open_els anymore + # 11. Let common ancestor be the element immediately above + # formatting element in the stack of open elements. + ca = open_els[fe_of_open_els + 1] # common ancestor + + node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list. bookmark = new_aaa_bookmark() for t, i in afe if t is fe afe.splice i, 0, bookmark + break node = last_node = fb inner = 0 loop inner += 1 + # 3. Let node be the element immediately above node in the + # stack of open elements, or if node is no longer in the stack + # of open elements (e.g. because it got removed by this + # algorithm), the element that was immediately above node in + # the stack of open elements before node was removed. node_next = null for t, i in open_els if t is node node_next = open_els[i + 1] break node = node_next ? node_above + debug_log "inner loop #{inner}" + debug_log "open_els: #{serialize_els open_els, true, true}" + debug_log "tree: #{serialize_els tree.children, false, true}" + debug_log "afe: #{serialize_els afe, true, true}" + debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}" + debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}" + debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}" + debug_log "node: #{node.serialize true, true}" # TODO make sure node_above gets re-set if/when node is removed from open_els + + # 4. If node is formatting element, then go to the next step in + # the overall algorithm. if node is fe break + debug_log "the meat" + # 5. If inner loop counter is greater than three and node is in + # the list of active formatting elements, then remove node from + # the list of active formatting elements. node_in_afe = false - for t, i of afe + for t, i in afe if t is node if inner > 3 afe.splice i, 1 + debug_log "max out inner" else node_in_afe = true + debug_log "in afe" break + # 6. If node is not in the list of active formatting elements, + # then remove node from the stack of open elements and then go + # back to the step labeled inner loop. unless node_in_afe + debug_log "not in afe" for t, i in open_els if t is node node_above = open_els[i + 1] open_els.splice i, 1 break continue - # 7. reate an element for the token for which the element node + debug_log "the bones" + # 7. create an element for the token for which the element node # was created, in the HTML namespace, with common ancestor as # the intended parent; replace the entry for node in the list # of active formatting elements with an entry for the new @@ -456,10 +582,13 @@ parse_html = (txt, parse_error_cb = null) -> for t, i in afe if t is node afe[i] = new_node + debug_log "replaced in afe" break for t, i in open_els if t is node + node_above = open_els[i + 1] open_els[i] = new_node + debug_log "replaced in open_els" break node = new_node # 8. If last node is furthest block, then move the @@ -469,29 +598,73 @@ parse_html = (txt, parse_error_cb = null) -> for t, i in afe if t is bookmark afe.splice i, 1 + debug_log "removed bookmark" + break for t, i in afe if t is node - # TODO test: position i gets you "after"? - afe.splice i, 0, new_aaa_bookmark() + # "after" means lower + afe.splice i, 0, bookmark # "after as <- + debug_log "placed bookmark after node" + debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}" + break # 9. Insert last node into node, first removing it from its # previous parent node if any. if last_node.parent? - for c, i of last_node.parent.children + debug_log "last_node has parent" + for c, i in last_node.parent.children if c is last_node + debug_log "removing last_node from parent" last_node.parent.children.splice i, 1 + break node.children.push last_node last_node.parent = node # 10. Let last node be node. last_node = node + debug_log "at last" # 11. Return to the step labeled inner loop. # 14. Insert whatever last node ended up being in the previous step # at the appropriate place for inserting a node, but using common # ancestor as the override target. - tree_insert_element last_node, ca + + # JASON: In the case where fe is immediately followed by fb: + # * inner loop exits out early (node==fe) + # * last_node is fb + # * last_node is still in the tree (not a duplicate) + if last_node.parent? + debug_log "FEFIRST? last_node has parent" + for c, i in last_node.parent.children + if c is last_node + debug_log "removing last_node from parent" + last_node.parent.children.splice i, 1 + break + + debug_log "after aaa inner loop" + debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}" + debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}" + debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}" + debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}" + debug_log "tree: #{serialize_els tree.children, false, true}" + + debug_log "insert" + + + # can't use standard insert token thing, because it's already in + # open_els and must stay at it's current position in open_els + dest = adjusted_insertion_location ca + dest[0].children.splice dest[1], 0, last_node + last_node.parent = dest[0] + + + debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}" + debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}" + debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}" + debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}" + debug_log "tree: #{serialize_els tree.children, false, true}" + # 15. Create an element for the token for which formatting element # was created, in the HTML namespace, with furthest block as the # intended parent. - new_element = fe.shallow_clone() + new_element = fe.shallow_clone() # FIXME intended parent thing # 16. Take all of the child nodes of furthest block and append them # to the element created in the last step. while fb.children.length @@ -511,32 +684,39 @@ parse_html = (txt, parse_error_cb = null) -> break for t, i in afe if t is bookmark - afe[i] = node + afe[i] = new_element break # 19. Remove formatting element from the stack of open elements, # and insert the new element into the stack of open elements # immediately below the position of furthest block in that stack. - for t, i of open_els + for t, i in open_els if t is fe open_els.splice i, 1 break - for t, i of open_els + for t, i in open_els if t is fb open_els.splice i, 0, new_element break # 20. Jump back to the step labeled outer loop. + debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}" + debug_log "tree: #{serialize_els tree.children, false, true}" + debug_log "open_els: #{serialize_els open_els, true, true}" + debug_log "afe: #{serialize_els afe, true, true}" + debug_log "AAA DONE" # http://www.w3.org/TR/html5/syntax.html#close-a-p-element - # FIXME implement this + # FIXME test this (particularly emplied end tags) + close_p_element = -> + generate_implied_end_tags 'p' # arg is exception + if open_els[0].name isnt 'p' + parse_error() + while open_els.length > 1 # just in case + t = open_els.shift() + if t.name is 'p' + return close_p_if_in_button_scope = -> - if open_els[0].name is 'p' - open_els.pop() - return - #p = find_button_scope 'p' - #if p? - # TODO generate_implied_end_tags except for p tags - # TODO parse_error unless open_els[0].name is 'p' - # TODO pop stack until 'p' popped + if is_in_button_scope 'p' + close_a_p_element() # http://www.w3.org/TR/html5/syntax.html#insert-a-character tree_insert_text = (t) -> @@ -629,15 +809,35 @@ parse_html = (txt, parse_error_cb = null) -> return el + # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element + insert_foreign_element = (token, namespace) -> + ail = adjusted_insertion_location() + ail_el = ail[0] + ail_i = ail[1] + el = token_to_element token, namespace, ail_el + # TODO skip this next step if it's broken (eg ail_el is document with child already) + el.parent = ail_el + ail_el.children.splice ail_i, 0, el + open_els.unshift el + return el + # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element + insert_html_element = insert_foreign_element # (token, namespace) -> + # FIXME read implement "foster parenting" part # FIXME read spec, do this right # FIXME implement the override target thing # note: this assumes it's an open tag - # TODO tree_insert_html_element = (t, ... + # FIXME what part of the spec is this? + # TODO look through all callers of this, and see what they should really be doing. + # eg probably insert_html_element for tokens tree_insert_element = (el, override_target = null, namespace = null) -> + if namespace? + el.namespace = namespace dest = adjusted_insertion_location override_target if el.type is TYPE_OPEN_TAG # means it's a "token" el = token_to_element el, namespace, dest[0] + unless el.namespace? + namespace = dest.namespace # fixfull: Document nodes sometimes can't accept more chidren dest[0].children.splice dest[1], 0, el el.parent = dest[0] @@ -649,18 +849,23 @@ parse_html = (txt, parse_error_cb = null) -> # FIXME read spec for "adjusted insertion location, etc, this might be wrong open_els[0].children.push t + # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags + # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags + generate_implied_end_tags = (except = null) -> + while end_tag_implied[open_els[0]] and open_els[0].name isnt except + open_els.shift() + # 8.2.5.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it for node, i in open_els - if node.name is name - # FIXME generate implied end tags except those with name==name + if node.name is name # FIXME check namespace too + generate_implied_end_tags name # arg is exception parse_error() unless i is 0 - while i > 0 + while i >= 0 open_els.shift() i -= 1 - open_els.shift() return - if special_elements[node.name]? + if special_elements[node.name]? # FIXME check namespac too parse_error() return tree_in_body = (t) -> @@ -685,7 +890,7 @@ parse_html = (txt, parse_error_cb = null) -> when 'html' parse_error() return if template_tag_is_open() - root_attrs = open_els[open_els.length - 1].children + root_attrs = open_els[open_els.length - 1].attrs for k, v of t.attrs root_attrs[k] = v unless root_attrs[k]? when 'base', 'basefont', 'bgsound', 'link', 'meta', 'noframes', 'script', 'style', 'template', 'title' @@ -699,18 +904,47 @@ parse_html = (txt, parse_error_cb = null) -> # TODO when 'address', 'article', 'aside', 'blockquote', 'center', 'details', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'header', 'hgroup', 'main', 'nav', 'ol', 'p', 'section', 'summary', 'ul' close_p_if_in_button_scope() - tree_insert_element t + insert_html_element t when 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' close_p_if_in_button_scope() if open_els[0].name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] parse_error() open_els.shift() - tree_insert_element t + insert_html_element t # TODO lots more to implement here + when 'a' + # If the list of active formatting elements + # contains an a element between the end of the list and + # the last marker on the list (or the start of the list + # if there is no marker on the list), then this is a + # parse error; run the adoption agency algorithm for + # the tag name "a", then remove that element from the + # list of active formatting elements and the stack of + # open elements if the adoption agency algorithm didn't + # already remove it (it might not have if the element + # is not in table scope). + found = false + for el in afe + if el.type is TYPE_MARKER + break + if el.name is 'a' + found = el + if found? + parse_error() + adoption_agency 'a' + for el, i in afe + if el is found + afe.splice i, 1 + for el, i in open_els + if el is found + open_els.splice i, 1 + reconstruct_active_formatting_elements() + el = tree_insert_element t + afe.unshift el when 'b', 'big', 'code', 'em', 'font', 'i', 's', 'small', 'strike', 'strong', 'tt', 'u' reconstruct_active_formatting_elements() el = tree_insert_element t - afe.push el + afe.unshift el # TODO lots more to implement here else # any other start tag reconstruct_active_formatting_elements() @@ -738,6 +972,23 @@ parse_html = (txt, parse_error_cb = null) -> parse_error() return # TODO implement parse error and move to tree_after_body, reprocess + when 'address', 'article', 'aside', 'blockquote', 'button', 'center', 'details', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'header', 'hgroup', 'listing', 'main', 'nav', 'ol', 'pre', 'section', 'summary', 'ul' + unless is_in_scope t.name, NS_HTML + parse_error() + return + generate_implied_end_tags() + unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML + parse_error() + loop + el = open_els.shift() + if el.name is t.name and el.namespace is NS_HTML + return + # TODO lots more close tags to implement here + when 'p' + unless is_in_button_scope 'p' + parse_error() + insert_html_element new_open_tag 'p' + close_p_element() # TODO lots more close tags to implement here when 'a', 'b', 'big', 'code', 'em', 'font', 'i', 'nobr', 's', 'small', 'strike', 'strong', 'tt', 'u' adoption_agency t.name @@ -1089,7 +1340,7 @@ parse_html = (txt, parse_error_cb = null) -> # tree constructor initialization # see comments on TYPE_TAG/etc for the structure of this data - tree = new Node TYPE_TAG, name: 'html' + tree = new Node TYPE_TAG, name: 'html', namespace: NS_HTML open_els = [tree] tree_state = tree_in_body flag_frameset_ok = true @@ -1115,18 +1366,25 @@ test_equals = (description, output, expected_output) -> console.log "FAILED: \"#{description}\"" console.log " Expected: #{expected_output}" console.log " Actual: #{output}" +serialize_els = (els, shallow, show_ids) -> + serialized = '' + sep = '' + for t in els + serialized += sep + sep = ',' + serialized += t.serialize shallow, show_ids + return serialized test_parser = (args) -> + debug_log_reset() parse_errors = [] errors_cb = (i) -> parse_errors.push i + prev_node_id = 0 # reset counter parsed = parse_html args.html, errors_cb - serialized = '' - sep = '' - for t in parsed - serialized += sep - sep = ',' - serialized += t.serialize() - if serialized isnt args.expected or parse_errors.length isnt args.errors + serialized = serialize_els parsed, false, false + if serialized isnt args.expected # or parse_errors.length isnt args.errors + debug_log_each (str) -> + console.log str console.log "FAILED: \"#{args.name}\"" else console.log "passed \"#{args.name}\"" @@ -1134,8 +1392,8 @@ test_parser = (args) -> console.log " Input: #{args.html}" console.log " Correct: #{args.expected}" console.log " Output: #{serialized}" - if parse_errors.length isnt args.errors - console.log " Expected #{args.errors} parse errors, but got these: #{JSON.stringify parse_errors}" + if parse_errors.length isnt args.errors + console.log " Expected #{args.errors} parse errors, but got these: #{JSON.stringify parse_errors}" test_parser name: "empty", \ html: "", @@ -1197,6 +1455,14 @@ test_parser name: "mis-matched formatting elements", \ html: "1234567890", expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"', errors: 1 # no idea how many their should be +test_parser name: "8.2.8.1 Misnested tags: ", \ + html: '

12345

', + expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]', + errors: 1 +test_parser name: "8.2.8.2 Misnested tags:

", \ + html: '1

23

', + expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]', + errors: 1 test_parser name: "crazy formatting elements test", \ html: "second
first
", # chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]' @@ -1222,5 +1488,5 @@ test_parser name: "html5lib aaa 4", \ errors: 2 test_parser name: "html5lib aaa 5", \ html: '1
2
34
5
', - expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"]text:"5"]', + expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]', errors: 3 -- 1.7.10.4