X-Git-Url: https://jasonwoof.com/gitweb/?a=blobdiff_plain;f=parse-html.coffee;h=97298740293a1348bce86dbe2586ec4d7de12246;hb=a77c7e459842f78d78161bc4a3f330ec1d0c2693;hp=8404f1adac847e7d80024b02e74c6cd43c4535e7;hpb=ec345979893e45258cfaf79c340470d7d60957bf;p=peach-html5-editor.git diff --git a/parse-html.coffee b/parse-html.coffee index 8404f1a..9729874 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -81,9 +81,13 @@ # 1: b # 0: a "end of the list", "current node", "bottommost", "last" -unless module?.exports? +if (typeof module) isnt 'undefined' and module.exports? + context = 'module' + exports = module.exports +else + context = 'browser' window.wheic = {} - module = exports: window.wheic + exports = window.wheic from_code_point = (x) -> if String.fromCodePoint? @@ -605,25 +609,29 @@ adjust_foreign_attributes = (t) -> # decode_named_char_ref() # -# The list of named character references is _huge_ so ask the browser to decode -# for us instead of wasting bandwidth/space on including the table here. -# -# Pass without the "&" but with the ";" examples: -# for "&" pass "amp;" -# for "′" pass "x2032;" -g_dncr = { - cache: {} - textarea: document.createElement('textarea') -} -# TODO test this in IE8 +# The list of named character references is _huge_ so if we're running in a +# browser, we get the browser to decode them, rather than increasing the code +# size to include the table. +if context is 'module' + _decode_named_char_ref = require './html5-named-entities.coffee' +else + # TODO test this in IE8 + decode_named_char_ref_el = document.createElement('textarea') + _decode_named_char_ref = (txt) -> + txt = "&#{txt};" + decode_named_char_ref_el.innerHTML = txt + decoded = decode_named_char_ref_el.value + return null if decoded is txt + return decoded +# Pass the name of a named entity _that has a terminating semicolon_ +# Entities without terminating semicolons should use legacy_char_refs[] +# Do not include the "&" or ";" in your argument, eg pass "alpha" +decode_named_char_ref_cache = {} decode_named_char_ref = (txt) -> - txt = "&#{txt}" - decoded = g_dncr.cache[txt] + decoded = decode_named_char_ref_cache[txt] return decoded if decoded? - g_dncr.textarea.innerHTML = txt - decoded = g_dncr.textarea.value - return null if decoded is txt - return g_dncr.cache[txt] = decoded + decoded = _decode_named_char_ref txt + return decode_named_char_ref_cache[txt] = decoded parse_html = (args) -> txt = null @@ -4561,35 +4569,33 @@ parse_html = (args) -> # exit early, because parse_error() below needs at least one alnum return '&' if txt.charAt(cur + i) is ';' - i += 1 # include ';' terminator in value decoded = decode_named_char_ref txt.substr(cur, i) + i += 1 # scan past the ';' (after, so we dno't pass it to decode) if decoded? cur += i return decoded - parse_error() - return '&' - else - # no ';' terminator (only legacy char refs) - max = i - for i in [2..max] # no prefix matches, so ok to check shortest first - c = legacy_char_refs[txt.substr(cur, i)] - if c? - if in_attr - if txt.charAt(cur + i) is '=' - # "because some legacy user agents will - # misinterpret the markup in those cases" - parse_error() - return '&' - if alnum.indexOf(txt.charAt(cur + i)) > -1 - # this makes attributes forgiving about url args - return '&' - # ok, and besides the weird exceptions for attributes... - # return the matching char - cur += i # consume entity chars - parse_error() # because no terminating ";" - return c - parse_error() - return '&' + # else FALL THROUGH (check for match without last char(s) or ";") + # no ';' terminator (only legacy char refs) + max = i + for i in [2..max] # no prefix matches, so ok to check shortest first + c = legacy_char_refs[txt.substr(cur, i)] + if c? + if in_attr + if txt.charAt(cur + i) is '=' + # "because some legacy user agents will + # misinterpret the markup in those cases" + parse_error() + return '&' + if alnum.indexOf(txt.charAt(cur + i)) > -1 + # this makes attributes forgiving about url args + return '&' + # ok, and besides the weird exceptions for attributes... + # return the matching char + cur += i # consume entity chars + parse_error() # because no terminating ";" + return c + parse_error() + return '&' return # never reached eat_next_token_if_newline = -> @@ -4733,16 +4739,16 @@ parse_html = (args) -> return fragment_root.children return doc.children -module.exports.parse_html = parse_html -module.exports.debug_log_reset = debug_log_reset -module.exports.debug_log_each = debug_log_each -module.exports.TYPE_TAG = TYPE_TAG -module.exports.TYPE_TEXT = TYPE_TEXT -module.exports.TYPE_COMMENT = TYPE_COMMENT -module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE -module.exports.NS_HTML = NS_HTML -module.exports.NS_MATHML = NS_MATHML -module.exports.NS_SVG = NS_SVG -module.exports.QUIRKS_NO = QUIRKS_NO -module.exports.QUIRKS_LIMITED = QUIRKS_LIMITED -module.exports.QUIRKS_YES = QUIRKS_YES +exports.parse_html = parse_html +exports.debug_log_reset = debug_log_reset +exports.debug_log_each = debug_log_each +exports.TYPE_TAG = TYPE_TAG +exports.TYPE_TEXT = TYPE_TEXT +exports.TYPE_COMMENT = TYPE_COMMENT +exports.TYPE_DOCTYPE = TYPE_DOCTYPE +exports.NS_HTML = NS_HTML +exports.NS_MATHML = NS_MATHML +exports.NS_SVG = NS_SVG +exports.QUIRKS_NO = QUIRKS_NO +exports.QUIRKS_LIMITED = QUIRKS_LIMITED +exports.QUIRKS_YES = QUIRKS_YES