X-Git-Url: https://jasonwoof.com/gitweb/?a=blobdiff_plain;f=parse-html.coffee;h=97298740293a1348bce86dbe2586ec4d7de12246;hb=a77c7e459842f78d78161bc4a3f330ec1d0c2693;hp=8404f1adac847e7d80024b02e74c6cd43c4535e7;hpb=ec345979893e45258cfaf79c340470d7d60957bf;p=peach-html5-editor.git

diff --git a/parse-html.coffee b/parse-html.coffee
index 8404f1a..9729874 100644
--- a/parse-html.coffee
+++ b/parse-html.coffee
@@ -81,9 +81,13 @@
 #   1: b
 #   0: a "end of the list", "current node", "bottommost", "last"
 
-unless module?.exports?
+if (typeof module) isnt 'undefined' and module.exports?
+	context = 'module'
+	exports = module.exports
+else
+	context = 'browser'
 	window.wheic = {}
-	module = exports: window.wheic
+	exports = window.wheic
 
 from_code_point = (x) ->
 	if String.fromCodePoint?
@@ -605,25 +609,29 @@ adjust_foreign_attributes = (t) ->
 
 # decode_named_char_ref()
 #
-# The list of named character references is _huge_ so ask the browser to decode
-# for us instead of wasting bandwidth/space on including the table here.
-#
-# Pass without the "&" but with the ";" examples:
-#    for "&amp" pass "amp;"
-#    for "&#x2032" pass "x2032;"
-g_dncr = {
-	cache: {}
-	textarea: document.createElement('textarea')
-}
-# TODO test this in IE8
+# The list of named character references is _huge_ so if we're running in a
+# browser, we get the browser to decode them, rather than increasing the code
+# size to include the table.
+if context is 'module'
+	_decode_named_char_ref = require './html5-named-entities.coffee'
+else
+	# TODO test this in IE8
+	decode_named_char_ref_el = document.createElement('textarea')
+	_decode_named_char_ref = (txt) ->
+		txt = "&#{txt};"
+		decode_named_char_ref_el.innerHTML = txt
+		decoded = decode_named_char_ref_el.value
+		return null if decoded is txt
+		return decoded
+# Pass the name of a named entity _that has a terminating semicolon_
+# Entities without terminating semicolons should use legacy_char_refs[]
+# Do not include the "&" or ";" in your argument, eg pass "alpha"
+decode_named_char_ref_cache = {}
 decode_named_char_ref = (txt) ->
-	txt = "&#{txt}"
-	decoded = g_dncr.cache[txt]
+	decoded = decode_named_char_ref_cache[txt]
 	return decoded if decoded?
-	g_dncr.textarea.innerHTML = txt
-	decoded = g_dncr.textarea.value
-	return null if decoded is txt
-	return g_dncr.cache[txt] = decoded
+	decoded = _decode_named_char_ref txt
+	return decode_named_char_ref_cache[txt] = decoded
 
 parse_html = (args) ->
 	txt = null
@@ -4561,35 +4569,33 @@ parse_html = (args) ->
 					# exit early, because parse_error() below needs at least one alnum
 					return '&'
 				if txt.charAt(cur + i) is ';'
-					i += 1 # include ';' terminator in value
 					decoded = decode_named_char_ref txt.substr(cur, i)
+					i += 1 # scan past the ';' (after, so we dno't pass it to decode)
 					if decoded?
 						cur += i
 						return decoded
-					parse_error()
-					return '&'
-				else
-					# no ';' terminator (only legacy char refs)
-					max = i
-					for i in [2..max] # no prefix matches, so ok to check shortest first
-						c = legacy_char_refs[txt.substr(cur, i)]
-						if c?
-							if in_attr
-								if txt.charAt(cur + i) is '='
-									# "because some legacy user agents will
-									# misinterpret the markup in those cases"
-									parse_error()
-									return '&'
-								if alnum.indexOf(txt.charAt(cur + i)) > -1
-									# this makes attributes forgiving about url args
-									return '&'
-							# ok, and besides the weird exceptions for attributes...
-							# return the matching char
-							cur += i # consume entity chars
-							parse_error() # because no terminating ";"
-							return c
-					parse_error()
-					return '&'
+					# else FALL THROUGH (check for match without last char(s) or ";")
+				# no ';' terminator (only legacy char refs)
+				max = i
+				for i in [2..max] # no prefix matches, so ok to check shortest first
+					c = legacy_char_refs[txt.substr(cur, i)]
+					if c?
+						if in_attr
+							if txt.charAt(cur + i) is '='
+								# "because some legacy user agents will
+								# misinterpret the markup in those cases"
+								parse_error()
+								return '&'
+							if alnum.indexOf(txt.charAt(cur + i)) > -1
+								# this makes attributes forgiving about url args
+								return '&'
+						# ok, and besides the weird exceptions for attributes...
+						# return the matching char
+						cur += i # consume entity chars
+						parse_error() # because no terminating ";"
+						return c
+				parse_error()
+				return '&'
 		return # never reached
 
 	eat_next_token_if_newline = ->
@@ -4733,16 +4739,16 @@ parse_html = (args) ->
 		return fragment_root.children
 	return doc.children
 
-module.exports.parse_html = parse_html
-module.exports.debug_log_reset = debug_log_reset
-module.exports.debug_log_each = debug_log_each
-module.exports.TYPE_TAG = TYPE_TAG
-module.exports.TYPE_TEXT = TYPE_TEXT
-module.exports.TYPE_COMMENT = TYPE_COMMENT
-module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
-module.exports.NS_HTML = NS_HTML
-module.exports.NS_MATHML = NS_MATHML
-module.exports.NS_SVG = NS_SVG
-module.exports.QUIRKS_NO = QUIRKS_NO
-module.exports.QUIRKS_LIMITED = QUIRKS_LIMITED
-module.exports.QUIRKS_YES = QUIRKS_YES
+exports.parse_html = parse_html
+exports.debug_log_reset = debug_log_reset
+exports.debug_log_each = debug_log_each
+exports.TYPE_TAG = TYPE_TAG
+exports.TYPE_TEXT = TYPE_TEXT
+exports.TYPE_COMMENT = TYPE_COMMENT
+exports.TYPE_DOCTYPE = TYPE_DOCTYPE
+exports.NS_HTML = NS_HTML
+exports.NS_MATHML = NS_MATHML
+exports.NS_SVG = NS_SVG
+exports.QUIRKS_NO = QUIRKS_NO
+exports.QUIRKS_LIMITED = QUIRKS_LIMITED
+exports.QUIRKS_YES = QUIRKS_YES