From 5721b575d9c3ee1cd6f0becdb7865acc93d98b92 Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Sun, 13 Dec 2015 21:38:26 -0500 Subject: [PATCH] code cleanup: sort fn defs in spec order --- parse-html.coffee | 121 ++++++++++++++++++++++++++--------------------------- 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/parse-html.coffee b/parse-html.coffee index 7ed736e..204c6ff 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -177,6 +177,66 @@ parse_html = (txt) -> return [TYPE_TEXT, c] return null + # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state + # & just got consumed + tok_state_character_reference_in_data = -> + tok_state = tok_state_data + if cur >= txt.length + return [TYPE_TEXT, '&'] + switch c = txt.charAt(cur) + when ';' + return [TYPE_TEXT, '&'] + when '#' + if cur + 1 >= txt.length + return [TYPE_TEXT, '&'] + if txt.charAt(cur + 1).toLowerCase() is 'x' + prefix = '#x' + charset = hex_chars + start = cur + 2 + else + charset = digits + start = cur + 1 + prefix = '#' + i = 0 + while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1 + i += 1 + if i is 0 + return [TYPE_TEXT, '&'] + if txt.charAt(start + i) is ';' + i += 1 + decoded = decode_named_char_ref(prefix + txt.substr(start, i).toLowerCase()) + if decoded? + cur = start + i + return [TYPE_TEXT, decoded] + return [TYPE_TEXT, '&'] + else + for i in [0...31] + if alnum.indexOf(txt.charAt(cur + i)) is -1 + break + if i is 0 + return [TYPE_TEXT, '&'] + if txt.charAt(cur + i) is ';' + i += 1 # include ';' terminator in value + decoded = decode_named_char_ref txt.substr(cur, i) + if decoded? + cur += i + return [TYPE_TEXT, decoded] + return [TYPE_TEXT, '&'] + else + # no ';' terminator (only legacy char refs) + if i < 2 or i > 6 + return [TYPE_TEXT, '&'] + # FIXME: if we're inside an attribute: + # 1. don't parse refs that are followed by = + # 2. don't parse refs that are followed by alnum + max = i + for i in [2..max] # no prefix matches, so ok to check shortest first + c = legacy_char_refs[txt.substr(cur, i)] + if c? + cur += i # consume entity chars + return [TYPE_TEXT, c] + return null + # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state tok_state_tag_open = -> switch c = txt.charAt(cur++) @@ -342,67 +402,6 @@ parse_html = (txt) -> cur -= 1 # we didn't handle that char return null - - # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state - # & just got consumed - tok_state_character_reference_in_data = -> - tok_state = tok_state_data - if cur >= txt.length - return [TYPE_TEXT, '&'] - switch c = txt.charAt(cur) - when ';' - return [TYPE_TEXT, '&'] - when '#' - if cur + 1 >= txt.length - return [TYPE_TEXT, '&'] - if txt.charAt(cur + 1).toLowerCase() is 'x' - prefix = '#x' - charset = hex_chars - start = cur + 2 - else - charset = digits - start = cur + 1 - prefix = '#' - i = 0 - while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1 - i += 1 - if i is 0 - return [TYPE_TEXT, '&'] - if txt.charAt(start + i) is ';' - i += 1 - decoded = decode_named_char_ref(prefix + txt.substr(start, i).toLowerCase()) - if decoded? - cur = start + i - return [TYPE_TEXT, decoded] - return [TYPE_TEXT, '&'] - else - for i in [0...31] - if alnum.indexOf(txt.charAt(cur + i)) is -1 - break - if i is 0 - return [TYPE_TEXT, '&'] - if txt.charAt(cur + i) is ';' - i += 1 # include ';' terminator in value - decoded = decode_named_char_ref txt.substr(cur, i) - if decoded? - cur += i - return [TYPE_TEXT, decoded] - return [TYPE_TEXT, '&'] - else - # no ';' terminator (only legacy char refs) - if i < 2 or i > 6 - return [TYPE_TEXT, '&'] - # FIXME: if we're inside an attribute: - # 1. don't parse refs that are followed by = - # 2. don't parse refs that are followed by alnum - max = i - for i in [2..max] # no prefix matches, so ok to check shortest first - c = legacy_char_refs[txt.substr(cur, i)] - if c? - cur += i # consume entity chars - return [TYPE_TEXT, c] - return null - # the functions below impliment the Tree Contstruction algorithm here: # http://www.w3.org/TR/html5/syntax.html#tree-construction # FIXME this is just a bit of a hack that makes sense... read spec and do it that way -- 1.7.10.4