From: Jason Woofenden Date: Mon, 14 Dec 2015 03:07:41 +0000 (-0500) Subject: parse unquoted and singlequoted attributes X-Git-Url: https://jasonwoof.com/gitweb/?a=commitdiff_plain;h=06466aac55914c23fc6ba986c28ccf069386767c;p=peach-html5-editor.git parse unquoted and singlequoted attributes --- diff --git a/parse-html.coffee b/parse-html.coffee index 204c6ff..1ef077a 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -361,11 +361,8 @@ parse_html = (txt) -> tok_cur_tag = null return tmp else - if uc_alpha.indexOf(c) > -1 - tok_cur_tag[2][0][1] += c.toLowerCase() - else - # Parse error if ", ` or < (that's a backtick) - tok_cur_tag[2][0][1] += c + tok_cur_tag[2][0][1] += c + tok_state = tok_state_attribute_value_unquoted return null # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state @@ -379,11 +376,45 @@ parse_html = (txt) -> when "\u0000" # Parse error tok_cur_tag[2][0][1] += "\ufffd" - tok_state = tok_state_attribute_value_unquoted else tok_cur_tag[2][0][1] += c return null + # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state + tok_state_attribute_value_single_quoted = -> + switch c = txt.charAt(cur++) + when "'" + tok_state = tok_state_after_attribute_value_quoted + when '&' + tok_state = tok_state_character_reference_in_attribute_value + tok_char_ref_addl_allowed = "'" # FIXME + when "\u0000" + # Parse error + tok_cur_tag[2][0][1] += "\ufffd" + else + tok_cur_tag[2][0][1] += c + return null + + # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state + tok_state_attribute_value_unquoted = -> + switch c = txt.charAt(cur++) + when "\t", "\n", "\u000c", ' ' + tok_state = tok_state_before_attribute_name + when '&' + tok_state = tok_state_character_reference_in_attribute_value + tok_char_ref_addl_allowed = '>' # FIXME + when '>' + tok_state = tok_state_data + tmp = tok_cur_tag + tok_cur_tag = null + return tmp + when "\u0000" + tok_cur_tag[2][0][1] += "\ufffd" + else + # Parse Error if ', <, = or ` (backtick) + tok_cur_tag[2][0][1] += c + return null + # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state tok_state_after_attribute_value_quoted = -> switch c = txt.charAt(cur++) @@ -450,4 +481,5 @@ test_equals "named entity", html_to_json, "a&1234", '[[1,"a&1234"]]' test_equals "broken named character references", html_to_json, "1&2&&3&aabbcc;", '[[1,"1&2&&3&aabbcc;"]]' test_equals "numbered entity overrides", html_to_json, "1€€ ƒ", '[[1,"1€€ ƒ"]]' test_equals "open tag", html_to_json, "foobar", '[[1,"foo"],[0,"span",{},[[1,"bar"]]]]' -test_equals "open tag with attributes", html_to_json, "foobar", '[[1,"foo"],[0,"span",{"style":"foo: bar"},[[1,"bar"]]]]' +test_equals "open tag with attributes", html_to_json, "foobar", '[[1,"foo"],[0,"span",{"style":"foo: bar","title":"hi"},[[1,"bar"]]]]' +test_equals "open tag with attributes of various quotings", html_to_json, "foobar", '[[1,"foo"],[0,"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\\"","autofocus":""},[[1,"bar"]]]]'