+ # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
+ # Don't set this as a state, just call it
+ # returns a string (NOT a text node)
+ tokenize_character_reference = (allowed_char = null, in_attr = false) ->
+ if cur >= txt.length
+ return '&'
+ switch c = txt.charAt(cur)
+ when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
+ # explicitly not a parse error
+ return '&'
+ when ';'
+ # there has to be "one or more" alnums between & and ; to be a parse error
+ return '&'
+ when '#'
+ if cur + 1 >= txt.length
+ return '&'
+ if txt.charAt(cur + 1).toLowerCase() is 'x'
+ prefix = '#x'
+ charset = hex_chars
+ start = cur + 2
+ else
+ charset = digits
+ start = cur + 1
+ prefix = '#'
+ i = 0
+ while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
+ i += 1
+ if i is 0
+ return '&'
+ if txt.charAt(start + i) is ';'
+ i += 1
+ # FIXME This is supposed to generate parse errors for some chars
+ decoded = decode_named_char_ref(prefix + txt.substr(start, i).toLowerCase())
+ if decoded?
+ cur = start + i
+ return decoded
+ return '&'
+ else
+ for i in [0...31]
+ if alnum.indexOf(txt.charAt(cur + i)) is -1
+ break
+ if i is 0
+ # exit early, because parse_error() below needs at least one alnum
+ return '&'
+ if txt.charAt(cur + i) is ';'
+ i += 1 # include ';' terminator in value
+ decoded = decode_named_char_ref txt.substr(cur, i)
+ if decoded?
+ cur += i
+ return decoded
+ parse_error()
+ return '&'
+ else
+ # no ';' terminator (only legacy char refs)
+ max = i
+ for i in [2..max] # no prefix matches, so ok to check shortest first
+ c = legacy_char_refs[txt.substr(cur, i)]
+ if c?
+ if in_attr
+ if txt.charAt(cur + i) is '='
+ # "because some legacy user agents will
+ # misinterpret the markup in those cases"
+ parse_error()
+ return '&'
+ if alnum.indexOf(txt.charAt(cur + i)) > -1
+ # this makes attributes forgiving about url args
+ return '&'
+ # ok, and besides the weird exceptions for attributes...
+ # return the matching char
+ cur += i # consume entity chars
+ parse_error() # because no terminating ";"
+ return c
+ parse_error()
+ return '&'
+ return # never reached
+