X-Git-Url: https://jasonwoof.com/gitweb/?a=blobdiff_plain;ds=sidebyside;f=parse-html.coffee;h=20bc99c39de4543dc344ad4f79e8ff9e39f2ac85;hb=e7cac479773cf44f9ae28a3f559cef095b19e361;hp=b3b3f7fd9d51dcc65e9bc237b43ec16afb06e736;hpb=fcaaf0f85eb620893b85f5efcb8d894b68793cd9;p=peach-html5-editor.git diff --git a/parse-html.coffee b/parse-html.coffee index b3b3f7f..20bc99c 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -1780,17 +1780,23 @@ parse_html = (args) -> # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it - for el, i in open_els - if el.name is name and el.namespace is NS_HTML + node = open_els[0] + loop + if node.name is name and node.namespace is NS_HTML generate_implied_end_tags name # arg is exception - parse_error() unless i is 0 - while i >= 0 - open_els.shift() - i -= 1 - return - if special_elements[el.name] is el.namespace + unless node is open_els[0] + parse_error() + loop + el = open_els.shift() + if el is node + return + if special_elements[node.name] is node.namespace parse_error() return + for el, i in open_els + if node is el + node = open_els[i + 1] + break return ins_mode_in_body = (t) -> if t.type is TYPE_TEXT and t.text is "\u0000" @@ -1918,11 +1924,7 @@ parse_html = (args) -> if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing') close_p_if_in_button_scope() insert_html_element t - # spec: If the next token is a "LF" (U+000A) character token, then - # ignore that token and move on to the next one. (Newlines at the - # start of pre blocks are ignored as an authoring convenience.) - if txt.charAt(cur) is "\u000a" # FIXME check for crlf? - cur += 1 + eat_next_token_if_newline() flag_frameset_ok = false return if t.type is TYPE_START_TAG and t.name is 'form' @@ -2117,6 +2119,10 @@ parse_html = (args) -> return if t.type is TYPE_START_TAG and t.name is 'nobr' reconstruct_afe() + if is_in_scope 'nobr', NS_HTML + parse_error() + adoption_agency 'nobr' + reconstruct_afe() el = insert_html_element t afe_push el return @@ -2151,7 +2157,8 @@ parse_html = (args) -> return if t.type is TYPE_END_TAG and t.name is 'br' parse_error() - t.type = TYPE_START_TAG + # W3C: t.type = TYPE_START_TAG + t = new_open_tag 'br' # WHATWG # fall through if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr') reconstruct_afe() @@ -2168,7 +2175,8 @@ parse_html = (args) -> unless is_input_hidden_tok t flag_frameset_ok = false return - if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track') + if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track') + # WHATWG adds 'menuitem' for this block insert_html_element t open_els.shift() t.acknowledge_self_closing() @@ -2228,8 +2236,7 @@ parse_html = (args) -> return if t.type is TYPE_START_TAG and t.name is 'textarea' insert_html_element t - if txt.charAt(cur) is "\u000a" # FIXME check for crlf? - cur += 1 + eat_next_token_if_newline() tok_state = tok_state_rcdata original_ins_mode = ins_mode flag_frameset_ok = false @@ -2724,7 +2731,7 @@ parse_html = (args) -> insert_html_element t return if t.type is TYPE_END_TAG and t.name is 'optgroup' - if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML + if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML open_els.shift() if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML @@ -2760,7 +2767,7 @@ parse_html = (args) -> return if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea') parse_error() - if is_in_select_scope 'select', NS_HTML + unless is_in_select_scope 'select', NS_HTML return loop el = open_els.shift() @@ -3086,7 +3093,7 @@ parse_html = (args) -> tok_state = tok_state_tag_open when "\u0000" parse_error() - return new_text_node "\ufffd" + return new_text_node c when '' # EOF return new_eof_token() else @@ -3783,7 +3790,7 @@ parse_html = (args) -> return if c is '>' tok_state = tok_state_data - return + return tok_cur_tag if is_uc_alpha(c) tok_cur_tag.attrs_a.unshift [c.toLowerCase(), ''] tok_state = tok_state_attribute_name @@ -4536,6 +4543,7 @@ parse_html = (args) -> else val = txt.substr cur, (next_gt - cur) cur = next_gt + 3 + val = val.replace(new RegExp("\u0000", 'g'), "\ufffd") if val.length > 0 return new_character_token val # fixfull split return null @@ -4629,6 +4637,24 @@ parse_html = (args) -> return '&' return # never reached + eat_next_token_if_newline = -> + old_cur = cur + t = null + until t? + t = tok_state() + if t.type is TYPE_TEXT + # definition of a newline depends on whether it was a character ref or not + if cur - old_cur is 1 + # not a character reference + if t.text is "\u000d" or t.text is "\u000a" + return + else + if t.text is "\u000a" + return + # not a "newline" + cur = old_cur + return + # tree constructor initialization # see comments on TYPE_TAG/etc for the structure of this data txt = args.html @@ -4657,19 +4683,20 @@ parse_html = (args) -> # text pre-processing # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream - txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this - if args.name is "tests23.dat #1" + if args.name is "webkit01.dat #12" console.log "hi" # proccess input # http://www.w3.org/TR/html5/syntax.html#tree-construction - while flag_parsing - t = tok_state() - if t? - process_token t - # fixfull parse error if has self-closing flag, but it wasn't acknolwedged + parse_main_loop = -> + while flag_parsing + t = tok_state() + if t? + process_token t + # fixfull parse error if has self-closing flag, but it wasn't acknolwedged + parse_main_loop() return doc.children serialize_els = (els, shallow, show_ids) ->