# HTML parser meant to run in a browser, in support of WYSIWYG editor
# Copyright 2015 Jason Woofenden
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


# This file implements a parser for html snippets, meant to be used by a
# WYSIWYG editor.

# The implementation is a pretty direct implementation of the parsing algorithm
# described here:
# http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
#
# Deviations from that spec:
#
#   Purposeful: search this file for "WHATWG"
#
#   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"


# stacks/lists
#
# the spec uses a many different words do indicate which ends of lists/stacks
# they are talking about (and relative movement within the lists/stacks). This
# section splains. I'm implementing "lists" (afe and open_els) the same way
# (both as stacks)
#
# stacks grow downward (current element is index=0)
#
# example: open_els = [a, b, c, d, e, f, g]
#
# "grows downwards" means it's visualized like this: (index: el, names)
#
#   6: g "start of the list", "topmost", "first"
#   5: f
#   4: e "previous" (to d), "above", "before"
#   3: d   (previous/next are relative to this element)
#   2: c "next", "after", "lower", "below"
#   1: b
#   0: a "end of the list", "current node", "bottommost", "last"


# browser
# note: to get this to run outside a browser, you'll have to write a native
# implementation of decode_named_char_ref()
unless module?.exports?
	window.wheic = {}
	module = exports: window.wheic

from_code_point = (x) ->
	if String.fromCodePoint?
		return String.fromCodePoint x
	else
		if x <= 0xffff
			return String.fromCharCode x
		x -= 0x10000
		return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)

# Each node is an obect of the Node class. Here are the Node types:
TYPE_TAG = 0 # name, {attributes}, [children]
TYPE_TEXT = 1 # "text"
TYPE_COMMENT = 2
TYPE_DOCTYPE = 3
# the following types are emited by the tokenizer, but shouldn't end up in the tree:
TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
TYPE_END_TAG = 5 # name
TYPE_EOF = 6
TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm

# namespace constants
NS_HTML = 1
NS_MATHML = 2
NS_SVG = 3

# quirks mode constants
QUIRKS_NO = 1
QUIRKS_LIMITED = 2
QUIRKS_YES = 3

g_debug_log = []
debug_log_reset = ->
	g_debug_log = []
debug_log = (str) ->
	g_debug_log.push str
debug_log_each = (cb) ->
	for str in g_debug_log
		cb str

prev_node_id = 0
class Node
	constructor: (type, args = {}) ->
		@type = type # one of the TYPE_* constants above
		@name = args.name ? '' # tag name
		@text = args.text ? '' # contents for text/comment nodes
		@attrs = args.attrs ? {}
		@attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
		@children = args.children ? []
		@namespace = args.namespace ? NS_HTML
		@parent = args.parent ? null
		@token = args.token ? null
		@flags = args.flags ? {}
		if args.id?
			@id = "#{args.id}+"
		else
			@id = "#{++prev_node_id}"
	acknowledge_self_closing: ->
		if @token?
			@token.flag 'did_self_close', true
		else
			@flag 'did_self_close', true
	flag: (key, value = null) ->
		if value?
			@flags[key] = value
		else
			return @flags[key]
	serialize: (shallow = false, show_ids = false) -> # for unit tests
		ret = ''
		switch @type
			when TYPE_TAG
				ret += 'tag:'
				ret += JSON.stringify @name
				ret += ','
				if show_ids
					ret += "##{@id},"
				if shallow
					break
				attr_keys = []
				for k of @attrs
					attr_keys.push k
				attr_keys.sort()
				ret += '{'
				sep = ''
				for k in attr_keys
					ret += sep
					sep = ','
					ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
				ret += '},['
				sep = ''
				for c in @children
					ret += sep
					sep = ','
					ret += c.serialize shallow, show_ids
				ret += ']'
			when TYPE_TEXT
				ret += 'text:'
				ret += JSON.stringify @text
			when TYPE_COMMENT
				ret += 'comment:'
				ret += JSON.stringify @text
			when TYPE_DOCTYPE
				ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
			when TYPE_AFE_MARKER
				ret += 'marker'
			when TYPE_AAA_BOOKMARK
				ret += 'aaa_bookmark'
			else
				ret += 'unknown:'
				console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
		return ret

# helpers: (only take args that are normally known when parser creates nodes)
new_open_tag = (name) ->
	return new Node TYPE_START_TAG, name: name
new_end_tag = (name) ->
	return new Node TYPE_END_TAG, name: name
new_element = (name) ->
	return new Node TYPE_TAG, name: name
new_text_node = (txt) ->
	return new Node TYPE_TEXT, text: txt
new_character_token = new_text_node
new_comment_token = (txt) ->
	return new Node TYPE_COMMENT, text: txt
new_doctype_token = (name) ->
	return new Node TYPE_DOCTYPE, name: name
new_eof_token = ->
	return new Node TYPE_EOF
new_afe_marker = ->
	return new Node TYPE_AFE_MARKER
new_aaa_bookmark = ->
	return new Node TYPE_AAA_BOOKMARK

lc_alpha = "abcdefghijklmnopqrstuvwxyz"
uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
digits = "0123456789"
alnum = lc_alpha + uc_alpha + digits
hex_chars = digits + "abcdefABCDEF"

is_uc_alpha = (str) ->
	return str.length is 1 and uc_alpha.indexOf(str) > -1
is_lc_alpha = (str) ->
	return str.length is 1 and lc_alpha.indexOf(str) > -1

# some SVG elements have dashes in them
tag_name_chars = alnum + "-"

# http://www.w3.org/TR/html5/infrastructure.html#space-character
space_chars = "\u0009\u000a\u000c\u000d\u0020"
is_space = (txt) ->
	return txt.length is 1 and space_chars.indexOf(txt) > -1
is_space_tok = (t) ->
	return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1

is_input_hidden_tok = (t) ->
	return false unless t.type is TYPE_START_TAG
	for a in t.attrs_a
		if a[0] is 'type'
			if a[1].toLowerCase() is 'hidden'
				return true
			return false
	return false

# https://en.wikipedia.org/wiki/Whitespace_character#Unicode
whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"

unicode_fixes = {}
unicode_fixes[0x00] = "\uFFFD"
unicode_fixes[0x80] = "\u20AC"
unicode_fixes[0x82] = "\u201A"
unicode_fixes[0x83] = "\u0192"
unicode_fixes[0x84] = "\u201E"
unicode_fixes[0x85] = "\u2026"
unicode_fixes[0x86] = "\u2020"
unicode_fixes[0x87] = "\u2021"
unicode_fixes[0x88] = "\u02C6"
unicode_fixes[0x89] = "\u2030"
unicode_fixes[0x8A] = "\u0160"
unicode_fixes[0x8B] = "\u2039"
unicode_fixes[0x8C] = "\u0152"
unicode_fixes[0x8E] = "\u017D"
unicode_fixes[0x91] = "\u2018"
unicode_fixes[0x92] = "\u2019"
unicode_fixes[0x93] = "\u201C"
unicode_fixes[0x94] = "\u201D"
unicode_fixes[0x95] = "\u2022"
unicode_fixes[0x96] = "\u2013"
unicode_fixes[0x97] = "\u2014"
unicode_fixes[0x98] = "\u02DC"
unicode_fixes[0x99] = "\u2122"
unicode_fixes[0x9A] = "\u0161"
unicode_fixes[0x9B] = "\u203A"
unicode_fixes[0x9C] = "\u0153"
unicode_fixes[0x9E] = "\u017E"
unicode_fixes[0x9F] = "\u0178"

quirks_yes_pi_prefixes = [
	"+//silmaril//dtd html pro v0r11 19970101//"
	"-//as//dtd html 3.0 aswedit + extensions//"
	"-//advasoft ltd//dtd html 3.0 aswedit + extensions//"
	"-//ietf//dtd html 2.0 level 1//"
	"-//ietf//dtd html 2.0 level 2//"
	"-//ietf//dtd html 2.0 strict level 1//"
	"-//ietf//dtd html 2.0 strict level 2//"
	"-//ietf//dtd html 2.0 strict//"
	"-//ietf//dtd html 2.0//"
	"-//ietf//dtd html 2.1e//"
	"-//ietf//dtd html 3.0//"
	"-//ietf//dtd html 3.2 final//"
	"-//ietf//dtd html 3.2//"
	"-//ietf//dtd html 3//"
	"-//ietf//dtd html level 0//"
	"-//ietf//dtd html level 1//"
	"-//ietf//dtd html level 2//"
	"-//ietf//dtd html level 3//"
	"-//ietf//dtd html strict level 0//"
	"-//ietf//dtd html strict level 1//"
	"-//ietf//dtd html strict level 2//"
	"-//ietf//dtd html strict level 3//"
	"-//ietf//dtd html strict//"
	"-//ietf//dtd html//"
	"-//metrius//dtd metrius presentational//"
	"-//microsoft//dtd internet explorer 2.0 html strict//"
	"-//microsoft//dtd internet explorer 2.0 html//"
	"-//microsoft//dtd internet explorer 2.0 tables//"
	"-//microsoft//dtd internet explorer 3.0 html strict//"
	"-//microsoft//dtd internet explorer 3.0 html//"
	"-//microsoft//dtd internet explorer 3.0 tables//"
	"-//netscape comm. corp.//dtd html//"
	"-//netscape comm. corp.//dtd strict html//"
	"-//o'reilly and associates//dtd html 2.0//"
	"-//o'reilly and associates//dtd html extended 1.0//"
	"-//o'reilly and associates//dtd html extended relaxed 1.0//"
	"-//sq//dtd html 2.0 hotmetal + extensions//"
	"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"
	"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"
	"-//spyglass//dtd html 2.0 extended//"
	"-//sun microsystems corp.//dtd hotjava html//"
	"-//sun microsystems corp.//dtd hotjava strict html//"
	"-//w3c//dtd html 3 1995-03-24//"
	"-//w3c//dtd html 3.2 draft//"
	"-//w3c//dtd html 3.2 final//"
	"-//w3c//dtd html 3.2//"
	"-//w3c//dtd html 3.2s draft//"
	"-//w3c//dtd html 4.0 frameset//"
	"-//w3c//dtd html 4.0 transitional//"
	"-//w3c//dtd html experimental 19960712//"
	"-//w3c//dtd html experimental 970421//"
	"-//w3c//dtd w3 html//"
	"-//w3o//dtd w3 html 3.0//"
	"-//webtechs//dtd mozilla html 2.0//"
	"-//webtechs//dtd mozilla html//"
]

# These are the character references that don't need a terminating semicolon
# min length: 2, max: 6, none are a prefix of any other.
legacy_char_refs = {
	Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
	aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
	aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
	Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
	curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
	ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
	euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
	Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
	igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
	lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
	Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
	Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
	Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
	pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
	shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
	times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
	ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
	yen: '¥', yuml: 'ÿ'
}

void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
raw_text_elements = ['script', 'style']
escapable_raw_text_elements = ['textarea', 'title']
# http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
svg_elements = [
	'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
	'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
	'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
	'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
	'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
	'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
	'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
	'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
	'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
	'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
	'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
	'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
	'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
	'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
	'view', 'vkern'
]

# http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
mathml_elements = [
	'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
	'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
	'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
	'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
	'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
	'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
	'determinant', 'diff', 'divergence', 'divide', 'domain',
	'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
	'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
	'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
	'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
	'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
	'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
	'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
	'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
	'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
	'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
	'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
	'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
	'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
	'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
	'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
	'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
	'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
	'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
	'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
	'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
	'vectorproduct', 'xor'
]
# foreign_elements = [svg_elements..., mathml_elements...]
#normal_elements = All other allowed HTML elements are normal elements.

special_elements = {
	# HTML:
	address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
	aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
	blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
	caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
	details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
	embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
	footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
	h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
	header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
	img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
	listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,

	menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these

	meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
	noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
	plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
	select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
	table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
	textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
	tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,

	# MathML:
	mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
	'annotation-xml':NS_MATHML,

	# SVG:
	foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
}

formatting_elements = {
	 a: true, b: true, big: true, code: true, em: true, font: true, i: true,
	 nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
	 u: true
}

mathml_text_integration = {
	mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
}
is_mathml_text_integration_point = (el) ->
	return mathml_text_integration[el.name] is el.namespace
is_html_integration = (el) -> # DON'T PASS A TOKEN
	if el.namespace is NS_MATHML
		if el.name is 'annotation-xml'
			if el.attrs.encoding?
				if el.attrs.encoding.toLowerCase() is 'text/html'
					return true
				if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
					return true
		return false
	if el.namespace is NS_SVG
		if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
			return true
	return false

h_tags = {
	h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
}

foster_parenting_targets = {
	table: NS_HTML
	tbody: NS_HTML
	tfoot: NS_HTML
	thead: NS_HTML
	tr: NS_HTML
}

end_tag_implied = {
	dd: NS_HTML
	dt: NS_HTML
	li: NS_HTML
	option: NS_HTML
	optgroup: NS_HTML
	p: NS_HTML
	rb: NS_HTML
	rp: NS_HTML
	rt: NS_HTML
	rtc: NS_HTML
}

el_is_special = (e) ->
	return special_elements[e.name] is e.namespace

adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
el_is_special_not_adp = (el) ->
	return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace

svg_name_fixes = {
	altglyph: 'altGlyph'
	altglyphdef: 'altGlyphDef'
	altglyphitem: 'altGlyphItem'
	animatecolor: 'animateColor'
	animatemotion: 'animateMotion'
	animatetransform: 'animateTransform'
	clippath: 'clipPath'
	feblend: 'feBlend'
	fecolormatrix: 'feColorMatrix'
	fecomponenttransfer: 'feComponentTransfer'
	fecomposite: 'feComposite'
	feconvolvematrix: 'feConvolveMatrix'
	fediffuselighting: 'feDiffuseLighting'
	fedisplacementmap: 'feDisplacementMap'
	fedistantlight: 'feDistantLight'
	fedropshadow: 'feDropShadow'
	feflood: 'feFlood'
	fefunca: 'feFuncA'
	fefuncb: 'feFuncB'
	fefuncg: 'feFuncG'
	fefuncr: 'feFuncR'
	fegaussianblur: 'feGaussianBlur'
	feimage: 'feImage'
	femerge: 'feMerge'
	femergenode: 'feMergeNode'
	femorphology: 'feMorphology'
	feoffset: 'feOffset'
	fepointlight: 'fePointLight'
	fespecularlighting: 'feSpecularLighting'
	fespotlight: 'feSpotLight'
	fetile: 'feTile'
	feturbulence: 'feTurbulence'
	foreignobject: 'foreignObject'
	glyphref: 'glyphRef'
	lineargradient: 'linearGradient'
	radialgradient: 'radialGradient'
	textpath: 'textPath'
}
svg_attribute_fixes = {
	attributename: 'attributeName'
	attributetype: 'attributeType'
	basefrequency: 'baseFrequency'
	baseprofile: 'baseProfile'
	calcmode: 'calcMode'
	clippathunits: 'clipPathUnits'
	contentscripttype: 'contentScriptType'
	contentstyletype: 'contentStyleType'
	diffuseconstant: 'diffuseConstant'
	edgemode: 'edgeMode'
	externalresourcesrequired: 'externalResourcesRequired'
	# WHATWG removes this: filterres: 'filterRes'
	filterunits: 'filterUnits'
	glyphref: 'glyphRef'
	gradienttransform: 'gradientTransform'
	gradientunits: 'gradientUnits'
	kernelmatrix: 'kernelMatrix'
	kernelunitlength: 'kernelUnitLength'
	keypoints: 'keyPoints'
	keysplines: 'keySplines'
	keytimes: 'keyTimes'
	lengthadjust: 'lengthAdjust'
	limitingconeangle: 'limitingConeAngle'
	markerheight: 'markerHeight'
	markerunits: 'markerUnits'
	markerwidth: 'markerWidth'
	maskcontentunits: 'maskContentUnits'
	maskunits: 'maskUnits'
	numoctaves: 'numOctaves'
	pathlength: 'pathLength'
	patterncontentunits: 'patternContentUnits'
	patterntransform: 'patternTransform'
	patternunits: 'patternUnits'
	pointsatx: 'pointsAtX'
	pointsaty: 'pointsAtY'
	pointsatz: 'pointsAtZ'
	preservealpha: 'preserveAlpha'
	preserveaspectratio: 'preserveAspectRatio'
	primitiveunits: 'primitiveUnits'
	refx: 'refX'
	refy: 'refY'
	repeatcount: 'repeatCount'
	repeatdur: 'repeatDur'
	requiredextensions: 'requiredExtensions'
	requiredfeatures: 'requiredFeatures'
	specularconstant: 'specularConstant'
	specularexponent: 'specularExponent'
	spreadmethod: 'spreadMethod'
	startoffset: 'startOffset'
	stddeviation: 'stdDeviation'
	stitchtiles: 'stitchTiles'
	surfacescale: 'surfaceScale'
	systemlanguage: 'systemLanguage'
	tablevalues: 'tableValues'
	targetx: 'targetX'
	targety: 'targetY'
	textlength: 'textLength'
	viewbox: 'viewBox'
	viewtarget: 'viewTarget'
	xchannelselector: 'xChannelSelector'
	ychannelselector: 'yChannelSelector'
	zoomandpan: 'zoomAndPan'
}
foreign_attr_fixes = {
	'xlink:actuate': 'xlink actuate'
	'xlink:arcrole': 'xlink arcrole'
	'xlink:href': 'xlink href'
	'xlink:role': 'xlink role'
	'xlink:show': 'xlink show'
	'xlink:title': 'xlink title'
	'xlink:type': 'xlink type'
	'xml:base': 'xml base'
	'xml:lang': 'xml lang'
	'xml:space': 'xml space'
	'xmlns': 'xmlns'
	'xmlns:xlink': 'xmlns xlink'
}
adjust_mathml_attributes = (t) ->
	for a in t.attrs_a
		if a[0] is 'definitionurl'
			a[0] = 'definitionURL'
	return
adjust_svg_attributes = (t) ->
	for a in t.attrs_a
		if svg_attribute_fixes[a[0]]?
			a[0] = svg_attribute_fixes[a[0]]
	return
adjust_foreign_attributes = (t) ->
	# fixfull
	for a in t.attrs_a
		if foreign_attr_fixes[a[0]]?
			a[0] = foreign_attr_fixes[a[0]]
	return

# decode_named_char_ref()
#
# The list of named character references is _huge_ so ask the browser to decode
# for us instead of wasting bandwidth/space on including the table here.
#
# Pass without the "&" but with the ";" examples:
#    for "&amp" pass "amp;"
#    for "&#x2032" pass "x2032;"
g_dncr = {
	cache: {}
	textarea: document.createElement('textarea')
}
# TODO test this in IE8
decode_named_char_ref = (txt) ->
	txt = "&#{txt}"
	decoded = g_dncr.cache[txt]
	return decoded if decoded?
	g_dncr.textarea.innerHTML = txt
	decoded = g_dncr.textarea.value
	return null if decoded is txt
	return g_dncr.cache[txt] = decoded

parse_html = (args) ->
	txt = null
	cur = null # index of next char in txt to be parsed
	# declare doc and tokenizer variables so they're in scope below
	doc = null
	open_els = null # stack of open elements
	afe = null # active formatting elements
	template_ins_modes = null
	ins_mode = null
	original_ins_mode = null
	tok_state = null
	tok_cur_tag = null # partially parsed tag
	flag_scripting = null
	flag_frameset_ok = null
	flag_parsing = null
	flag_foster_parenting = null
	form_element_pointer = null
	temporary_buffer = null
	pending_table_character_tokens = null
	head_element_pointer = null
	flag_fragment_parsing = null
	context_element = null

	stop_parsing = ->
		flag_parsing = false

	parse_error = ->
		if args.error_cb?
			args.error_cb cur
		else
			console.log "Parse error at character #{cur} of #{txt.length}"

	# http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
	# "Noah's Ark clause" but with three
	afe_push = (new_el) ->
		matches = 0
		for el, i in afe
			if el.type is TYPE_AFE_MARKER
				break
			if el.name is new_el.name and el.namespace is new_el.namespace
				attrs_match = true
				for k, v of el.attrs
					unless new_el.attrs[k] is v
						attrs_match = false
						break
				if attrs_match
					for k, v of new_el.attrs
						unless el.attrs[k] is v
							attrs_match = false
							break
				if attrs_match
					matches += 1
					if matches is 3
						afe.splice i, 1
						break
		afe.unshift new_el
	afe_push_marker = ->
		afe.unshift new_afe_marker()

	# the functions below impliment the Tree Contstruction algorithm
	# http://www.w3.org/TR/html5/syntax.html#tree-construction

	# But first... the helpers
	template_tag_is_open = ->
		for el in open_els
			if el.name is 'template' and el.namespace is NS_HTML
				return true
		return false
	is_in_scope_x = (tag_name, scope, namespace) ->
		for el in open_els
			if el.name is tag_name and (namespace is null or namespace is el.namespace)
				return true
			if scope[el.name] is el.namespace
				return false
		return false
	is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
		for el in open_els
			if el.name is tag_name and (namespace is null or namespace is el.namespace)
				return true
			if scope[el.name] is el.namespace
				return false
			if scope2[el.name] is el.namespace
				return false
		return false
	standard_scopers = {
		applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
		td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
		template: NS_HTML,

		mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
		mtext: NS_MATHML, 'annotation-xml': NS_MATHML,

		foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
	}
	button_scopers = button: NS_HTML
	li_scopers = ol: NS_HTML, ul: NS_HTML
	table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
	is_in_scope = (tag_name, namespace = null) ->
		return is_in_scope_x tag_name, standard_scopers, namespace
	is_in_button_scope = (tag_name, namespace = null) ->
		return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
	is_in_table_scope = (tag_name, namespace = null) ->
		return is_in_scope_x tag_name, table_scopers, namespace
	# aka is_in_list_item_scope
	is_in_li_scope = (tag_name, namespace = null) ->
		return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
	is_in_select_scope = (tag_name, namespace = null) ->
		for t in open_els
			if t.name is tag_name and (namespace is null or namespace is t.namespace)
				return true
			if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
				return false
		return false
	# this checks for a particular element, not by name
	# this requires a namespace match
	el_is_in_scope = (needle) ->
		for el in open_els
			if el is needle
				return true
			if standard_scopers[el.name] is el.namespace
				return false
		return false

	clear_to_table_stopers = {
		'table': true
		'template': true
		'html': true
	}
	clear_stack_to_table_context = ->
		loop
			if clear_to_table_stopers[open_els[0].name]?
				break
			open_els.shift()
		return
	clear_to_table_body_stopers = {
		tbody: NS_HTML
		tfoot: NS_HTML
		thead: NS_HTML
		template: NS_HTML
		html: NS_HTML
	}
	clear_stack_to_table_body_context = ->
		loop
			if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
				break
			open_els.shift()
		return
	clear_to_table_row_stopers = {
		'tr': true
		'template': true
		'html': true
	}
	clear_stack_to_table_row_context = ->
		loop
			if clear_to_table_row_stopers[open_els[0].name]?
				break
			open_els.shift()
		return
	clear_afe_to_marker = ->
		loop
			return unless afe.length > 0 # this happens in fragment case, ?spec error
			el = afe.shift()
			if el.type is TYPE_AFE_MARKER
				return
		return

	# 8.2.3.1 ...
	# http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
	reset_ins_mode = ->
		# 1. Let last be false.
		last = false
		# 2. Let node be the last node in the stack of open elements.
		node_i = 0
		node = open_els[node_i]
		# 3. Loop: If node is the first node in the stack of open elements,
		# then set last to true, and, if the parser was originally created as
		# part of the HTML fragment parsing algorithm (fragment case) set node
		# to the context element.
		loop
			if node_i is open_els.length - 1
				last = true
				# fixfull (fragment case)

			# 4. If node is a select element, run these substeps:
			if node.name is 'select' and node.namespace is NS_HTML
				# 1. If last is true, jump to the step below labeled done.
				unless last
					# 2. Let ancestor be node.
					ancestor_i = node_i
					ancestor = node
					# 3. Loop: If ancestor is the first node in the stack of
					# open elements, jump to the step below labeled done.
					loop
						if ancestor_i is open_els.length - 1
							break
						# 4. Let ancestor be the node before ancestor in the stack
						# of open elements.
						ancestor_i += 1
						ancestor = open_els[ancestor_i]
						# 5. If ancestor is a template node, jump to the step below
						# labeled done.
						if ancestor.name is 'template' and ancestor.namespace is NS_HTML
							break
						# 6. If ancestor is a table node, switch the insertion mode
						# to "in select in table" and abort these steps.
						if ancestor.name is 'table' and ancestor.namespace is NS_HTML
							ins_mode = ins_mode_in_select_in_table
							return
						# 7. Jump back to the step labeled loop.
				# 8. Done: Switch the insertion mode to "in select" and abort
				# these steps.
				ins_mode = ins_mode_in_select
				return
			# 5. If node is a td or th element and last is false, then switch
			# the insertion mode to "in cell" and abort these steps.
			if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
				ins_mode = ins_mode_in_cell
				return
			# 6. If node is a tr element, then switch the insertion mode to "in
			# row" and abort these steps.
			if node.name is 'tr' and node.namespace is NS_HTML
				ins_mode = ins_mode_in_row
				return
			# 7. If node is a tbody, thead, or tfoot element, then switch the
			# insertion mode to "in table body" and abort these steps.
			if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
				ins_mode = ins_mode_in_table_body
				return
			# 8. If node is a caption element, then switch the insertion mode
			# to "in caption" and abort these steps.
			if node.name is 'caption' and node.namespace is NS_HTML
				ins_mode = ins_mode_in_caption
				return
			# 9. If node is a colgroup element, then switch the insertion mode
			# to "in column group" and abort these steps.
			if node.name is 'colgroup' and node.namespace is NS_HTML
				ins_mode = ins_mode_in_column_group
				return
			# 10. If node is a table element, then switch the insertion mode to
			# "in table" and abort these steps.
			if node.name is 'table' and node.namespace is NS_HTML
				ins_mode = ins_mode_in_table
				return
			# 11. If node is a template element, then switch the insertion mode
			# to the current template insertion mode and abort these steps.
			if node.name is 'template' and node.namespace is NS_HTML
				ins_mode = template_ins_modes[0]
				return
			# 12. If node is a head element and last is true, then switch the
			# insertion mode to "in body" ("in body"! not "in head"!) and abort
			# these steps. (fragment case)
			if node.name is 'head' and node.namespace is NS_HTML and last
				ins_mode = ins_mode_in_body
				return
			# 13. If node is a head element and last is false, then switch the
			# insertion mode to "in head" and abort these steps.
			if node.name is 'head' and node.namespace is NS_HTML and last is false
				ins_mode = ins_mode_in_head
				return
			# 14. If node is a body element, then switch the insertion mode to
			# "in body" and abort these steps.
			if node.name is 'body' and node.namespace is NS_HTML
				ins_mode = ins_mode_in_body
				return
			# 15. If node is a frameset element, then switch the insertion mode
			# to "in frameset" and abort these steps. (fragment case)
			if node.name is 'frameset' and node.namespace is NS_HTML
				ins_mode = ins_mode_in_frameset
				return
			# 16. If node is an html element, run these substeps:
			if node.name is 'html' and node.namespace is NS_HTML
				# 1. If the head element pointer is null, switch the insertion
				# mode to "before head" and abort these steps. (fragment case)
				if head_element_pointer is null
					ins_mode = ins_mode_before_head
				else
					# 2. Otherwise, the head element pointer is not null,
					# switch the insertion mode to "after head" and abort these
					# steps.
					ins_mode = ins_mode_after_head
				return
			# 17. If last is true, then switch the insertion mode to "in body"
			# and abort these steps. (fragment case)
			if last
				ins_mode = ins_mode_in_body
				return
			# 18. Let node now be the node before node in the stack of open
			# elements.
			node_i += 1
			node = open_els[node_i]
			# 19. Return to the step labeled loop.

	# 8.2.3.2

	# http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
	adjusted_current_node = ->
		if open_els.length is 1 and flag_fragment_parsing
			return context_element
		return open_els[0]

	# http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
	# this implementation is structured (mostly) as described at the link above.
	# capitalized comments are the "labels" described at the link above.
	reconstruct_afe = ->
		return if afe.length is 0
		if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
			return
		# Rewind
		i = 0
		loop
			if i is afe.length - 1
				break
			i += 1
			if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
				i -= 1 # Advance
				break
		# Create
		loop
			el = insert_html_element afe[i].token
			afe[i] = el
			break if i is 0
			i -= 1 # Advance

	# http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
	# adoption agency algorithm
	# overview here:
	#   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
	#   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
	#   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
	adoption_agency = (subject) ->
		debug_log "adoption_agency()"
		debug_log "tree: #{serialize_els doc.children, false, true}"
		debug_log "open_els: #{serialize_els open_els, true, true}"
		debug_log "afe: #{serialize_els afe, true, true}"
# this block implements tha W3C spec
#		# 1. If the current node is an HTML element whose tag name is subject,
#		# then run these substeps:
#		#
#		# 1. Let element be the current node.
#		#
#		# 2. Pop element off the stack of open elements.
#		#
#		# 3. If element is also in the list of active formatting elements,
#		# remove the element from the list.
#		#
#		# 4. Abort the adoption agency algorithm.
#		if open_els[0].name is subject and open_els[0].namespace is NS_HTML
#			el = open_els.shift()
#			# remove it from the list of active formatting elements (if found)
#			for t, i in afe
#				if t is el
#					afe.splice i, 1
#					break
#			debug_log "aaa: starting off with subject on top of stack, exiting"
#			return
# WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
		# If the current node is an HTML element whose tag name is subject, and
		# the current node is not in the list of active formatting elements,
		# then pop the current node off the stack of open elements, and abort
		# these steps.
		if open_els[0].name is subject and open_els[0].namespace is NS_HTML
			debug_log "aaa: starting off with subject on top of stack, exiting"
			# remove it from the list of active formatting elements (if found)
			in_afe = false
			for el, i in afe
				if el is open_els[0]
					in_afe = true
					break
			unless in_afe
				debug_log "aaa: ...and not in afe, aaa done"
				open_els.shift()
				return
			# fall through
# END WHATWG
		outer = 0
		loop
			if outer >= 8
				return
			outer += 1
			# 5. Let formatting element be the last element in the list of
			# active formatting elements that: is between the end of the list
			# and the last scope marker in the list, if any, or the start of
			# the list otherwise, and  has the tag name subject.
			fe = null
			for t, fe_of_afe in afe
				if t.type is TYPE_AFE_MARKER
					break
				if t.name is subject
					fe = t
					break
			# If there is no such element, then abort these steps and instead
			# act as described in the "any other end tag" entry above.
			if fe is null
				debug_log "aaa: fe not found in afe"
				in_body_any_other_end_tag subject
				return
			# 6. If formatting element is not in the stack of open elements,
			# then this is a parse error; remove the element from the list, and
			# abort these steps.
			in_open_els = false
			for t, fe_of_open_els in open_els
				if t is fe
					in_open_els = true
					break
			unless in_open_els
				debug_log "aaa: fe not found in open_els"
				parse_error()
				# "remove it from the list" must mean afe, since it's not in open_els
				afe.splice fe_of_afe, 1
				return
			# 7. If formatting element is in the stack of open elements, but
			# the element is not in scope, then this is a parse error; abort
			# these steps.
			unless el_is_in_scope fe
				debug_log "aaa: fe not in scope"
				parse_error()
				return
			# 8. If formatting element is not the current node, this is a parse
			# error. (But do not abort these steps.)
			unless open_els[0] is fe
				parse_error()
				# continue
			# 9. Let furthest block be the topmost node in the stack of open
			# elements that is lower in the stack than formatting element, and
			# is an element in the special category. There might not be one.
			fb = null
			fb_of_open_els = null
			for t, i in open_els
				if t is fe
					break
				if el_is_special t
					fb = t
					fb_of_open_els = i
					# and continue, to see if there's one that's more "topmost"
			# 10. If there is no furthest block, then the UA must first pop all
			# the nodes from the bottom of the stack of open elements, from the
			# current node up to and including formatting element, then remove
			# formatting element from the list of active formatting elements,
			# and finally abort these steps.
			if fb is null
				debug_log "aaa: no fb"
				loop
					t = open_els.shift()
					if t is fe
						afe.splice fe_of_afe, 1
						return
			# 11. Let common ancestor be the element immediately above
			# formatting element in the stack of open elements.
			ca = open_els[fe_of_open_els + 1] # common ancestor

			node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
			# 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
			bookmark = new_aaa_bookmark()
			for t, i in afe
				if t is fe
					afe.splice i, 0, bookmark
					break
			node = last_node = fb
			inner = 0
			loop
				inner += 1
				# 3. Let node be the element immediately above node in the
				# stack of open elements, or if node is no longer in the stack
				# of open elements (e.g. because it got removed by this
				# algorithm), the element that was immediately above node in
				# the stack of open elements before node was removed.
				node_next = null
				for t, i in open_els
					if t is node
						node_next = open_els[i + 1]
						break
				node = node_next ? node_above
				debug_log "inner loop #{inner}"
				debug_log "tree: #{serialize_els doc.children, false, true}"
				debug_log "open_els: #{serialize_els open_els, true, true}"
				debug_log "afe: #{serialize_els afe, true, true}"
				debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
				debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
				debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
				debug_log "node: #{node.serialize true, true}"
				# TODO make sure node_above gets re-set if/when node is removed from open_els

				# 4. If node is formatting element, then go to the next step in
				# the overall algorithm.
				if node is fe
					break
				debug_log "the meat"
				# 5. If inner loop counter is greater than three and node is in
				# the list of active formatting elements, then remove node from
				# the list of active formatting elements.
				node_in_afe = false
				for t, i in afe
					if t is node
						if inner > 3
							afe.splice i, 1
							debug_log "max out inner"
						else
							node_in_afe = true
							debug_log "in afe"
						break
				# 6. If node is not in the list of active formatting elements,
				# then remove node from the stack of open elements and then go
				# back to the step labeled inner loop.
				unless node_in_afe
					debug_log "not in afe"
					for t, i in open_els
						if t is node
							node_above = open_els[i + 1]
							open_els.splice i, 1
							break
					continue
				debug_log "the bones"
				# 7. create an element for the token for which the element node
				# was created, in the HTML namespace, with common ancestor as
				# the intended parent; replace the entry for node in the list
				# of active formatting elements with an entry for the new
				# element, replace the entry for node in the stack of open
				# elements with an entry for the new element, and let node be
				# the new element.
				new_node = token_to_element node.token, NS_HTML, ca
				for t, i in afe
					if t is node
						afe[i] = new_node
						debug_log "replaced in afe"
						break
				for t, i in open_els
					if t is node
						node_above = open_els[i + 1]
						open_els[i] = new_node
						debug_log "replaced in open_els"
						break
				node = new_node
				# 8. If last node is furthest block, then move the
				# aforementioned bookmark to be immediately after the new node
				# in the list of active formatting elements.
				if last_node is fb
					for t, i in afe
						if t is bookmark
							afe.splice i, 1
							debug_log "removed bookmark"
							break
					for t, i in afe
						if t is node
							# "after" means lower
							afe.splice i, 0, bookmark # "after as <-
							debug_log "placed bookmark after node"
							debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
							break
				# 9. Insert last node into node, first removing it from its
				# previous parent node if any.
				if last_node.parent?
					debug_log "last_node has parent"
					for c, i in last_node.parent.children
						if c is last_node
							debug_log "removing last_node from parent"
							last_node.parent.children.splice i, 1
							break
				node.children.push last_node
				last_node.parent = node
				# 10. Let last node be node.
				last_node = node
				debug_log "at last"
				# 11. Return to the step labeled inner loop.
			# 14. Insert whatever last node ended up being in the previous step
			# at the appropriate place for inserting a node, but using common
			# ancestor as the override target.

			# In the case where fe is immediately followed by fb:
			#   * inner loop exits out early (node==fe)
			#   * last_node is fb
			#   * last_node is still in the tree (not a duplicate)
			if last_node.parent?
				debug_log "FEFIRST? last_node has parent"
				for c, i in last_node.parent.children
					if c is last_node
						debug_log "removing last_node from parent"
						last_node.parent.children.splice i, 1
						break

			debug_log "after aaa inner loop"
			debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
			debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
			debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
			debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
			debug_log "tree: #{serialize_els doc.children, false, true}"

			debug_log "insert"


			# can't use standard insert token thing, because it's already in
			# open_els and must stay at it's current position in open_els
			dest = adjusted_insertion_location ca
			dest[0].children.splice dest[1], 0, last_node
			last_node.parent = dest[0]


			debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
			debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
			debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
			debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
			debug_log "tree: #{serialize_els doc.children, false, true}"

			# 15. Create an element for the token for which formatting element
			# was created, in the HTML namespace, with furthest block as the
			# intended parent.
			new_element = token_to_element fe.token, NS_HTML, fb
			# 16. Take all of the child nodes of furthest block and append them
			# to the element created in the last step.
			while fb.children.length
				t = fb.children.shift()
				t.parent = new_element
				new_element.children.push t
			# 17. Append that new element to furthest block.
			new_element.parent = fb
			fb.children.push new_element
			# 18. Remove formatting element from the list of active formatting
			# elements, and insert the new element into the list of active
			# formatting elements at the position of the aforementioned
			# bookmark.
			for t, i in afe
				if t is fe
					afe.splice i, 1
					break
			for t, i in afe
				if t is bookmark
					afe[i] = new_element
					break
			# 19. Remove formatting element from the stack of open elements,
			# and insert the new element into the stack of open elements
			# immediately below the position of furthest block in that stack.
			for t, i in open_els
				if t is fe
					open_els.splice i, 1
					break
			for t, i in open_els
				if t is fb
					open_els.splice i, 0, new_element
					break
			# 20. Jump back to the step labeled outer loop.
			debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
			debug_log "tree: #{serialize_els doc.children, false, true}"
			debug_log "open_els: #{serialize_els open_els, true, true}"
			debug_log "afe: #{serialize_els afe, true, true}"
		debug_log "AAA DONE"

	# http://www.w3.org/TR/html5/syntax.html#close-a-p-element
	close_p_element = ->
		generate_implied_end_tags 'p' # arg is exception
		unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
			parse_error()
		while open_els.length > 1 # just in case
			el = open_els.shift()
			if el.name is 'p' and el.namespace is NS_HTML
				return
	close_p_if_in_button_scope = ->
		if is_in_button_scope 'p', NS_HTML
			close_p_element()

	# http://www.w3.org/TR/html5/syntax.html#insert-a-character
	# aka insert_a_character = (t) ->
	insert_character = (t) ->
		dest = adjusted_insertion_location()
		# fixfull check for Document node
		if dest[1] > 0
			prev = dest[0].children[dest[1] - 1]
			if prev.type is TYPE_TEXT
				prev.text += t.text
				return
		dest[0].children.splice dest[1], 0, t


	# 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
	process_token = (t) ->
		acn = adjusted_current_node()
		unless acn?
			ins_mode t
			return
		if acn.namespace is NS_HTML
			ins_mode t
			return
		if is_mathml_text_integration_point(acn)
			if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
				ins_mode t
				return
			if t.type is TYPE_TEXT
				ins_mode t
				return
		if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
			ins_mode t
			return
		if is_html_integration acn
			if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
				ins_mode t
				return
		if t.type is TYPE_EOF
			ins_mode t
			return
		in_foreign_content t
		return

	# 8.2.5.1
	# http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
	# http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
	adjusted_insertion_location = (override_target = null) ->
		# 1. If there was an override target specified, then let target be the
		# override target.
		if override_target?
			target = override_target
		else # Otherwise, let target be the current node.
			target = open_els[0]
		# 2. Determine the adjusted insertion location using the first matching
		# steps from the following list:
		#
		# If foster parenting is enabled and target is a table, tbody, tfoot,
		# thead, or tr element Foster parenting happens when content is
		# misnested in tables.
		if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
			loop # once. this is here so we can ``break`` to "abort these substeps"
				# 1. Let last template be the last template element in the
				# stack of open elements, if any.
				last_template = null
				last_template_i = null
				for el, i in open_els
					if el.name is 'template' and el.namespace is NS_HTML
						last_template = el
						last_template_i = i
						break
				# 2. Let last table be the last table element in the stack of
				# open elements, if any.
				last_table = null
				last_table_i
				for el, i in open_els
					if el.name is 'table' and el.namespace is NS_HTML
						last_table = el
						last_table_i = i
						break
				# 3. If there is a last template and either there is no last
				# table, or there is one, but last template is lower (more
				# recently added) than last table in the stack of open
				# elements, then: let adjusted insertion location be inside
				# last template's template contents, after its last child (if
				# any), and abort these substeps.
				if last_template and (last_table is null or last_template_i < last_table_i)
					target = last_template # fixfull should be it's contents
					target_i = target.children.length
					break
				# 4. If there is no last table, then let adjusted insertion
				# location be inside the first element in the stack of open
				# elements (the html element), after its last child (if any),
				# and abort these substeps. (fragment case)
				if last_table is null
					# this is odd
					target = open_els[open_els.length - 1]
					target_i = target.children.length
					break
				# 5. If last table has a parent element, then let adjusted
				# insertion location be inside last table's parent element,
				# immediately before last table, and abort these substeps.
				if last_table.parent?
					for c, i in last_table.parent.children
						if c is last_table
							target = last_table.parent
							target_i = i
							break
					break
				# 6. Let previous element be the element immediately above last
				# table in the stack of open elements.
				#
				# huh? how could it not have a parent?
				previous_element = open_els[last_table_i + 1]
				# 7. Let adjusted insertion location be inside previous
				# element, after its last child (if any).
				target = previous_element
				target_i = target.children.length
				# Note: These steps are involved in part because it's possible
				# for elements, the table element in this case in particular,
				# to have been moved by a script around in the DOM, or indeed
				# removed from the DOM entirely, after the element was inserted
				# by the parser.
				break # don't really loop
		else
			# Otherwise Let adjusted insertion location be inside target, after
			# its last child (if any).
			target_i = target.children.length

		# 3. If the adjusted insertion location is inside a template element,
		# let it instead be inside the template element's template contents,
		# after its last child (if any).
		# fixfull (template)

		# 4. Return the adjusted insertion location.
		return [target, target_i]

	# http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
	# aka create_an_element_for_token
	token_to_element = (t, namespace, intended_parent) ->
		# convert attributes into a hash
		attrs = {}
		for a in t.attrs_a
			attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
		el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t

		# TODO 2. If the newly created element has an xmlns attribute in the
		# XMLNS namespace whose value is not exactly the same as the element's
		# namespace, that is a parse error. Similarly, if the newly created
		# element has an xmlns:xlink attribute in the XMLNS namespace whose
		# value is not the XLink Namespace, that is a parse error.

		# fixfull: the spec says stuff about form pointers and ownerDocument

		return el

	# http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
	insert_foreign_element = (token, namespace) ->
		ail = adjusted_insertion_location()
		ail_el = ail[0]
		ail_i = ail[1]
		el = token_to_element token, namespace, ail_el
		# TODO skip this next step if it's broken (eg ail_el is document with child already)
		el.parent = ail_el
		ail_el.children.splice ail_i, 0, el
		open_els.unshift el
		return el
	# http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
	insert_html_element = (token) ->
		insert_foreign_element token, NS_HTML

	# http://www.w3.org/TR/html5/syntax.html#insert-a-comment
	# position should be [node, index_within_children]
	insert_comment = (t, position = null) ->
		position ?= adjusted_insertion_location()
		position[0].children.splice position[1], 0, t

	# 8.2.5.2
	# http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
	parse_generic_raw_text = (t) ->
		insert_html_element t
		tok_state = tok_state_rawtext
		original_ins_mode = ins_mode
		ins_mode = ins_mode_text
	parse_generic_rcdata_text = (t) ->
		insert_html_element t
		tok_state = tok_state_rcdata
		original_ins_mode = ins_mode
		ins_mode = ins_mode_text

	# 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
	# http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
	generate_implied_end_tags = (except = null) ->
		while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
			open_els.shift()

	# 8.2.5.4 The rules for parsing tokens in HTML content
	# http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml

	# 8.2.5.4.1 The "initial" insertion mode
	# http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
	is_quirks_yes_doctype = (t) ->
		if t.flag 'force-quirks'
			return true
		if t.name isnt 'html'
			return true
		if t.public_identifier?
			pi = t.public_identifier.toLowerCase()
			for p in quirks_yes_pi_prefixes
				if pi.substr(0, p.length) is p
					return true
			if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html'
				return true
		if t.system_identifier?
			if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
				return true
		else if t.public_identifier?
			# already did this: pi = t.public_identifier.toLowerCase()
			if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
				return true
		return false
	is_quirks_limited_doctype = (t) ->
		if t.public_identifier?
			pi = t.public_identifier.toLowerCase()
			if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//'
				return true
			if t.system_identifier?
				if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
					return true
		return false
	ins_mode_initial = (t) ->
		if is_space_tok t
			return
		if t.type is TYPE_COMMENT
			# ?fixfull
			doc.children.push t
			return
		if t.type is TYPE_DOCTYPE
			# fixfull syntax error from first paragraph and following bullets
			# fixfull set doc.doctype
			# fixfull is the "not an iframe srcdoc" thing relevant?
			if is_quirks_yes_doctype t
				doc.flag 'quirks mode', QUIRKS_YES
			else if is_quirks_limited_doctype t
				doc.flag 'quirks mode', QUIRKS_LIMITED
			doc.children.push t
			ins_mode = ins_mode_before_html
			return
		# Anything else
		# fixfull not iframe srcdoc?
		parse_error()
		doc.flag 'quirks mode', QUIRKS_YES
		ins_mode = ins_mode_before_html
		process_token t
		return

	# 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
	ins_mode_before_html = (t) ->
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_COMMENT
			doc.children.push t
			return
		if is_space_tok t
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			el = token_to_element t, NS_HTML, doc
			doc.children.push el
			open_els.unshift(el)
			# fixfull (big paragraph in spec about manifest, fragment, urls, etc)
			ins_mode = ins_mode_before_head
			return
		if t.type is TYPE_END_TAG
			if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
				# fall through to "anything else"
			else
				parse_error()
				return
		# Anything else
		el = token_to_element new_open_tag('html'), NS_HTML, doc
		doc.children.push el
		el.parent = doc
		open_els.unshift el
		# ?fixfull browsing context
		ins_mode = ins_mode_before_head
		process_token t
		return

	# 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
	ins_mode_before_head = (t) ->
		if is_space_tok t
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_START_TAG and t.name is 'head'
			el = insert_html_element t
			head_element_pointer = el
			ins_mode = ins_mode_in_head
			return
		if t.type is TYPE_END_TAG
			if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
				# fall through to Anything else below
			else
				parse_error()
				return
		# Anything else
		el = insert_html_element new_open_tag 'head'
		head_element_pointer = el
		ins_mode = ins_mode_in_head
		process_token t

	# 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
	ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
		open_els.shift() # spec says this will be a 'head' node
		ins_mode = ins_mode_after_head
		process_token t
	ins_mode_in_head = (t) ->
		if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
			insert_character t
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
			el = insert_html_element t
			open_els.shift()
			t.acknowledge_self_closing()
			return
		if t.type is TYPE_START_TAG and t.name is 'meta'
			el = insert_html_element t
			open_els.shift()
			t.acknowledge_self_closing()
			# fixfull encoding stuff
			return
		if t.type is TYPE_START_TAG and t.name is 'title'
			parse_generic_rcdata_text t
			return
		if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
			parse_generic_raw_text t
			return
		if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
			insert_html_element t
			ins_mode = ins_mode_in_head_noscript
			return
		if t.type is TYPE_START_TAG and t.name is 'script'
			ail = adjusted_insertion_location()
			el = token_to_element t, NS_HTML, ail
			el.flag 'parser-inserted', true
			# fixfull frament case
			ail[0].children.splice ail[1], 0, el
			open_els.unshift el
			tok_state = tok_state_script_data
			original_ins_mode = ins_mode # make sure orig... is defined
			ins_mode = ins_mode_text
			return
		if t.type is TYPE_END_TAG and t.name is 'head'
			open_els.shift() # will be a head element... spec says so
			ins_mode = ins_mode_after_head
			return
		if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
			ins_mode_in_head_else t
			return
		if t.type is TYPE_START_TAG and t.name is 'template'
			insert_html_element t
			afe_push_marker()
			flag_frameset_ok = false
			ins_mode = ins_mode_in_template
			template_ins_modes.unshift ins_mode_in_template
			return
		if t.type is TYPE_END_TAG and t.name is 'template'
			if template_tag_is_open()
				generate_implied_end_tags
				if open_els[0].name isnt 'template'
					parse_error()
				loop
					el = open_els.shift()
					if el.name is 'template' and el.namespace is NS_HTML
						break
				clear_afe_to_marker()
				template_ins_modes.shift()
				reset_ins_mode()
			else
				parse_error()
			return
		if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
			parse_error()
			return
		ins_mode_in_head_else t

	# 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
	ins_mode_in_head_noscript_else = (t) ->
		parse_error()
		open_els.shift()
		ins_mode = ins_mode_in_head
		process_token t
	ins_mode_in_head_noscript = (t) ->
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_END_TAG and t.name is 'noscript'
			open_els.shift()
			ins_mode = ins_mode_in_head
			return
		if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
			ins_mode_in_head t
			return
		if t.type is TYPE_END_TAG and t.name is 'br'
			ins_mode_in_head_noscript_else t
			return
		if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
			parse_error()
			return
		# Anything else
		ins_mode_in_head_noscript_else t
		return


	# 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
	ins_mode_after_head_else = (t) ->
		body_tok = new_open_tag 'body'
		insert_html_element body_tok
		ins_mode = ins_mode_in_body
		process_token t
		return
	ins_mode_after_head = (t) ->
		if is_space_tok t
			insert_character t
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_START_TAG and t.name is 'body'
			insert_html_element t
			flag_frameset_ok = false
			ins_mode = ins_mode_in_body
			return
		if t.type is TYPE_START_TAG and t.name is 'frameset'
			insert_html_element t
			ins_mode = ins_mode_in_frameset
			return
		if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
			parse_error()
			open_els.unshift head_element_pointer
			ins_mode_in_head t
			for el, i in open_els
				if el is head_element_pointer
					open_els.splice i, 1
					return
			console.log "warning: 23904 couldn't find head element in open_els"
			return
		if t.type is TYPE_END_TAG and t.name is 'template'
			ins_mode_in_head t
			return
		if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
			ins_mode_after_head_else t
			return
		if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
			parse_error()
			return
		# Anything else
		ins_mode_after_head_else t

	# 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
	in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
		node = open_els[0]
		loop
			if node.name is name and node.namespace is NS_HTML
				generate_implied_end_tags name # arg is exception
				unless node is open_els[0]
					parse_error()
				loop
					el = open_els.shift()
					if el is node
						return
			if special_elements[node.name] is node.namespace
				parse_error()
				return
			for el, i in open_els
				if node is el
					node = open_els[i + 1]
					break
		return
	ins_mode_in_body = (t) ->
		if t.type is TYPE_TEXT and t.text is "\u0000"
			parse_error()
			return
		if is_space_tok t
			reconstruct_afe()
			insert_character t
			return
		if t.type is TYPE_TEXT
			reconstruct_afe()
			insert_character t
			flag_frameset_ok = false
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			parse_error()
			return if template_tag_is_open()
			root_attrs = open_els[open_els.length - 1].attrs
			for a in t.attrs_a
				root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
			return

		if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
			ins_mode_in_head t
			return
		if t.type is TYPE_START_TAG and t.name is 'body'
			parse_error()
			return if open_els.length < 2
			second = open_els[open_els.length - 2]
			return unless second.namespace is NS_HTML
			return unless second.name is 'body'
			return if template_tag_is_open()
			flag_frameset_ok = false
			for a in t.attrs_a
				second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
			return
		if t.type is TYPE_START_TAG and t.name is 'frameset'
			parse_error()
			return if open_els.length < 2
			second_i = open_els.length - 2
			second = open_els[second_i]
			return unless second.namespace is NS_HTML
			return unless second.name is 'body'
			if flag_frameset_ok is false
				return
			if second.parent?
				for el, i in second.parent.children
					if el is second
						second.parent.children.splice i, 1
						break
			open_els.splice second_i, 1
			# pop everything except the "root html element"
			while open_els.length > 1
				open_els.shift()
			insert_html_element t
			ins_mode = ins_mode_in_frameset
			return
		if t.type is TYPE_EOF
			ok_tags = {
				dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
				td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
				tr:NS_HTML, body:NS_HTML, html:NS_HTML,
			}
			for el in open_els
				unless ok_tags[t.name] is el.namespace
					parse_error()
					break
			if template_ins_modes.length > 0
				ins_mode_in_template t
			else
				stop_parsing()
			return
		if t.type is TYPE_END_TAG and t.name is 'body'
			unless is_in_scope 'body', NS_HTML
				parse_error()
				return
			ok_tags = {
				dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
				option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
				rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
				th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
				html:NS_HTML
			}
			for el in open_els
				unless ok_tags[t.name] is el.namespace
					parse_error()
					break
			ins_mode = ins_mode_after_body
			return
		if t.type is TYPE_END_TAG and t.name is 'html'
			unless is_in_scope 'body', NS_HTML
				parse_error()
				return
			ok_tags = {
				dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
				option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
				rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
				th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
				html:NS_HTML
			}
			for el in open_els
				unless ok_tags[t.name] is el.namespace
					parse_error()
					break
			ins_mode = ins_mode_after_body
			process_token t
			return
		if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
			close_p_if_in_button_scope()
			insert_html_element t
			return
		if t.type is TYPE_START_TAG and h_tags[t.name]?
			close_p_if_in_button_scope()
			if h_tags[open_els[0].name] is open_els[0].namespace
				parse_error()
				open_els.shift()
			insert_html_element t
			return
		if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
			close_p_if_in_button_scope()
			insert_html_element t
			eat_next_token_if_newline()
			flag_frameset_ok = false
			return
		if t.type is TYPE_START_TAG and t.name is 'form'
			unless form_element_pointer is null or template_tag_is_open()
				parse_error()
				return
			close_p_if_in_button_scope()
			el = insert_html_element t
			unless template_tag_is_open()
				form_element_pointer = el
			return
		if t.type is TYPE_START_TAG and t.name is 'li'
			flag_frameset_ok = false
			for node in open_els
				if node.name is 'li' and node.namespace is NS_HTML
					generate_implied_end_tags 'li' # arg is exception
					if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
						parse_error()
					loop
						el = open_els.shift()
						if el.name is 'li' and el.namespace is NS_HTML
							break
					break
				if el_is_special_not_adp node
						break
			close_p_if_in_button_scope()
			insert_html_element t
			return
		if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
			flag_frameset_ok = false
			for node in open_els
				if node.name is 'dd' and node.namespace is NS_HTML
					generate_implied_end_tags 'dd' # arg is exception
					if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
						parse_error()
					loop
						el = open_els.shift()
						if el.name is 'dd' and el.namespace is NS_HTML
							break
					break
				if node.name is 'dt' and node.namespace is NS_HTML
					generate_implied_end_tags 'dt' # arg is exception
					if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
						parse_error()
					loop
						el = open_els.shift()
						if el.name is 'dt' and el.namespace is NS_HTML
							break
					break
				if el_is_special_not_adp node
					break
			close_p_if_in_button_scope()
			insert_html_element t
			return
		if t.type is TYPE_START_TAG and t.name is 'plaintext'
			close_p_if_in_button_scope()
			insert_html_element t
			tok_state = tok_state_plaintext
			return
		if t.type is TYPE_START_TAG and t.name is 'button'
			if is_in_scope 'button', NS_HTML
				parse_error()
				generate_implied_end_tags()
				loop
					el = open_els.shift()
					if el.name is 'button' and el.namespace is NS_HTML
						break
			reconstruct_afe()
			insert_html_element t
			flag_frameset_ok = false
			return
		if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
			unless is_in_scope t.name, NS_HTML
				parse_error()
				return
			generate_implied_end_tags()
			unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
				parse_error()
			loop
				el = open_els.shift()
				if el.name is t.name and el.namespace is NS_HTML
					return
			return
		if t.type is TYPE_END_TAG and t.name is 'form'
			unless template_tag_is_open()
				node = form_element_pointer
				form_element_pointer = null
				if node is null or not el_is_in_scope node
					parse_error()
					return
				generate_implied_end_tags()
				if open_els[0] isnt node
					parse_error()
				for el, i in open_els
					if el is node
						open_els.splice i, 1
						break
			else
				unless is_in_scope 'form', NS_HTML
					parse_error()
					return
				generate_implied_end_tags()
				if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
					parse_error()
				loop
					el = open_els.shift()
					if el.name is 'form' and el.namespace is NS_HTML
						break
			return
		if t.type is TYPE_END_TAG and t.name is 'p'
			unless is_in_button_scope 'p', NS_HTML
				parse_error()
				insert_html_element new_open_tag 'p'
			close_p_element()
			return
		if t.type is TYPE_END_TAG and t.name is 'li'
			unless is_in_li_scope 'li', NS_HTML
				parse_error()
				return
			generate_implied_end_tags 'li' # arg is exception
			if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
				parse_error()
			loop
				el = open_els.shift()
				if el.name is 'li' and el.namespace is NS_HTML
					break
			return
		if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
			unless is_in_scope t.name, NS_HTML
				parse_error()
				return
			generate_implied_end_tags t.name # arg is exception
			if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
				parse_error()
			loop
				el = open_els.shift()
				if el.name is t.name and el.namespace is NS_HTML
					break
			return
		if t.type is TYPE_END_TAG and h_tags[t.name]?
			h_in_scope = false
			for el in open_els
				if h_tags[el.name] is el.namespace
					h_in_scope = true
					break
				if standard_scopers[el.name] is el.namespace
					break
			unless h_in_scope
				parse_error()
				return
			generate_implied_end_tags()
			if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
				parse_error()
			loop
				el = open_els.shift()
				if h_tags[el.name] is el.namespace
					break
			return
		# deep breath!
		if t.type is TYPE_START_TAG and t.name is 'a'
			# If the list of active formatting elements contains an a element
			# between the end of the list and the last marker on the list (or
			# the start of the list if there is no marker on the list), then
			# this is a parse error; run the adoption agency algorithm for the
			# tag name "a", then remove that element from the list of active
			# formatting elements and the stack of open elements if the
			# adoption agency algorithm didn't already remove it (it might not
			# have if the element is not in table scope).
			found = false
			for el in afe
				if el.type is TYPE_AFE_MARKER
					break
				if el.name is 'a' and el.namespace is NS_HTML
					found = el
			if found?
				parse_error()
				adoption_agency 'a'
				for el, i in afe
					if el is found
						afe.splice i, 1
				for el, i in open_els
					if el is found
						open_els.splice i, 1
			reconstruct_afe()
			el = insert_html_element t
			afe_push el
			return
		if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
			reconstruct_afe()
			el = insert_html_element t
			afe_push el
			return
		if t.type is TYPE_START_TAG and t.name is 'nobr'
			reconstruct_afe()
			if is_in_scope 'nobr', NS_HTML
				parse_error()
				adoption_agency 'nobr'
				reconstruct_afe()
			el = insert_html_element t
			afe_push el
			return
		if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
			adoption_agency t.name
			return
		if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
			reconstruct_afe()
			insert_html_element t
			afe_push_marker()
			flag_frameset_ok = false
			return
		if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
			unless is_in_scope t.name, NS_HTML
				parse_error()
				return
			generate_implied_end_tags()
			if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
				parse_error()
			loop
				el = open_els.shift()
				if el.name is t.name and el.namespace is NS_HTML
					break
			clear_afe_to_marker()
			return
		if t.type is TYPE_START_TAG and t.name is 'table'
			unless doc.flag('quirks mode') is QUIRKS_YES
				close_p_if_in_button_scope() # test
			insert_html_element t
			flag_frameset_ok = false
			ins_mode = ins_mode_in_table
			return
		if t.type is TYPE_END_TAG and t.name is 'br'
			parse_error()
			# W3C: t.type = TYPE_START_TAG
			t = new_open_tag 'br' # WHATWG
			# fall through
		if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
			reconstruct_afe()
			insert_html_element t
			open_els.shift()
			t.acknowledge_self_closing()
			flag_frameset_ok = false
			return
		if t.type is TYPE_START_TAG and t.name is 'input'
			reconstruct_afe()
			insert_html_element t
			open_els.shift()
			t.acknowledge_self_closing()
			unless is_input_hidden_tok t
				flag_frameset_ok = false
			return
		if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
			# WHATWG adds 'menuitem' for this block
			insert_html_element t
			open_els.shift()
			t.acknowledge_self_closing()
			return
		if t.type is TYPE_START_TAG and t.name is 'hr'
			close_p_if_in_button_scope()
			insert_html_element t
			open_els.shift()
			t.acknowledge_self_closing()
			flag_frameset_ok = false
			return
		if t.type is TYPE_START_TAG and t.name is 'image'
			parse_error()
			t.name = 'img'
			process_token t
			return
		if t.type is TYPE_START_TAG and t.name is 'isindex'
			parse_error()
			if template_tag_is_open() is false and form_element_pointer isnt null
				return
			t.acknowledge_self_closing()
			flag_frameset_ok = false
			close_p_if_in_button_scope()
			el = insert_html_element new_open_tag 'form'
			unless template_tag_is_open()
				form_element_pointer = el
			for a in t.attrs_a
				if a[0] is 'action'
					el.attrs['action'] = a[1]
					break
			insert_html_element new_open_tag 'hr'
			open_els.shift()
			reconstruct_afe()
			insert_html_element new_open_tag 'label'
			# note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
			input_el = new_open_tag 'input'
			prompt = null
			for a in t.attrs_a
				if a[0] is 'prompt'
					prompt = a[1]
				if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
					input_el.attrs_a.push [a[0], a[1]]
			input_el.attrs_a.push ['name', 'isindex']
			# fixfull this next bit is in english... internationalize?
			prompt ?= "This is a searchable index. Enter search keywords: "
			insert_character new_character_token prompt # fixfull split
			# TODO submit typo "balue" in spec
			insert_html_element input_el
			open_els.shift()
			# insert_character '' # you can put chars here if promt attr missing
			open_els.shift()
			insert_html_element new_open_tag 'hr'
			open_els.shift()
			open_els.shift()
			unless template_tag_is_open()
				form_element_pointer = null
			return
		if t.type is TYPE_START_TAG and t.name is 'textarea'
			insert_html_element t
			eat_next_token_if_newline()
			tok_state = tok_state_rcdata
			original_ins_mode = ins_mode
			flag_frameset_ok = false
			ins_mode = ins_mode_text
			return
		if t.type is TYPE_START_TAG and t.name is 'xmp'
			close_p_if_in_button_scope()
			reconstruct_afe()
			flag_frameset_ok = false
			parse_generic_raw_text t
			return
		if t.type is TYPE_START_TAG and t.name is 'iframe'
			flag_frameset_ok = false
			parse_generic_raw_text t
			return
		if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
			parse_generic_raw_text t
			return
		if t.type is TYPE_START_TAG and t.name is 'select'
			reconstruct_afe()
			insert_html_element t
			flag_frameset_ok = false
			if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
				ins_mode = ins_mode_in_select_in_table
			else
				ins_mode = ins_mode_in_select
			return
		if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
			if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
				open_els.shift()
			reconstruct_afe()
			insert_html_element t
			return
# this comment block implements the W3C spec
#		if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
#			if is_in_scope 'ruby', NS_HTML
#				generate_implied_end_tags()
#				unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
#					parse_error()
#			insert_html_element t
#			return
#		if t.type is TYPE_START_TAG and t.name is 'rt'
#			if is_in_scope 'ruby', NS_HTML
#				generate_implied_end_tags 'rtc' # arg is exception
#				unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
#					parse_error()
#			insert_html_element t
#			return
# below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
		if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
			if is_in_scope 'ruby', NS_HTML
				generate_implied_end_tags()
				unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
					parse_error()
			insert_html_element t
			return
		if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
			if is_in_scope 'ruby', NS_HTML
				generate_implied_end_tags 'rtc'
				unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
					parse_error()
			insert_html_element t
			return
# end WHATWG chunk
		if t.type is TYPE_START_TAG and t.name is 'math'
			reconstruct_afe()
			adjust_mathml_attributes t
			adjust_foreign_attributes t
			insert_foreign_element t, NS_MATHML
			if t.flag 'self-closing'
				open_els.shift()
				t.acknowledge_self_closing()
			return
		if t.type is TYPE_START_TAG and t.name is 'svg'
			reconstruct_afe()
			adjust_svg_attributes t
			adjust_foreign_attributes t
			insert_foreign_element t, NS_SVG
			if t.flag 'self-closing'
				open_els.shift()
				t.acknowledge_self_closing()
			return
		if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
			parse_error()
			return
		if t.type is TYPE_START_TAG # any other start tag
			reconstruct_afe()
			insert_html_element t
			return
		if t.type is TYPE_END_TAG # any other end tag
			in_body_any_other_end_tag t.name
			return
		return

	# 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
	ins_mode_text = (t) ->
		if t.type is TYPE_TEXT
			insert_character t
			return
		if t.type is TYPE_EOF
			parse_error()
			if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
				open_els[0].flag 'already started', true
			open_els.shift()
			ins_mode = original_ins_mode
			process_token t
			return
		if t.type is TYPE_END_TAG and t.name is 'script'
			open_els.shift()
			ins_mode = original_ins_mode
			# fixfull the spec seems to assume that I'm going to run the script
			# http://www.w3.org/TR/html5/syntax.html#scriptEndTag
			return
		if t.type is TYPE_END_TAG
			open_els.shift()
			ins_mode = original_ins_mode
			return
		console.log 'warning: end of ins_mode_text reached'

	# the functions below implement the tokenizer stats described here:
	# http://www.w3.org/TR/html5/syntax.html#tokenization

	# 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
	ins_mode_in_table_else = (t) ->
		parse_error()
		flag_foster_parenting = true
		ins_mode_in_body t
		flag_foster_parenting = false
		return
	ins_mode_in_table = (t) ->
		switch t.type
			when TYPE_TEXT
				if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
					pending_table_character_tokens = []
					original_ins_mode = ins_mode
					ins_mode = ins_mode_in_table_text
					process_token t
				else
					ins_mode_in_table_else t
			when TYPE_COMMENT
				insert_comment t
			when TYPE_DOCTYPE
				parse_error()
			when TYPE_START_TAG
				switch t.name
					when 'caption'
						clear_stack_to_table_context()
						afe_push_marker()
						insert_html_element t
						ins_mode = ins_mode_in_caption
					when 'colgroup'
						clear_stack_to_table_context()
						insert_html_element t
						ins_mode = ins_mode_in_column_group
					when 'col'
						clear_stack_to_table_context()
						insert_html_element new_open_tag 'colgroup'
						ins_mode = ins_mode_in_column_group
						process_token t
					when 'tbody', 'tfoot', 'thead'
						clear_stack_to_table_context()
						insert_html_element t
						ins_mode = ins_mode_in_table_body
					when 'td', 'th', 'tr'
						clear_stack_to_table_context()
						insert_html_element new_open_tag 'tbody'
						ins_mode = ins_mode_in_table_body
						process_token t
					when 'table'
						parse_error()
						if is_in_table_scope 'table', NS_HTML
							loop
								el = open_els.shift()
								if el.name is 'table' and el.namespace is NS_HTML
									break
							reset_ins_mode()
							process_token t
					when 'style', 'script', 'template'
						ins_mode_in_head t
					when 'input'
						unless is_input_hidden_tok t
							ins_mode_in_table_else t
						else
							parse_error()
							el = insert_html_element t
							open_els.shift()
							t.acknowledge_self_closing()
					when 'form'
						parse_error()
						if form_element_pointer?
							return
						if template_tag_is_open()
							return
						form_element_pointer = insert_html_element t
						open_els.shift()
					else
						ins_mode_in_table_else t
			when TYPE_END_TAG
				switch t.name
					when 'table'
						if is_in_table_scope 'table', NS_HTML
							loop
								el = open_els.shift()
								if el.name is 'table' and el.namespace is NS_HTML
									break
							reset_ins_mode()
						else
							parse_error()
					when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
						parse_error()
					when 'template'
						ins_mode_in_head t
					else
						ins_mode_in_table_else t
			when TYPE_EOF
				ins_mode_in_body t
			else
				ins_mode_in_table_else t


	# 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
	ins_mode_in_table_text = (t) ->
		if t.type is TYPE_TEXT and t.text is "\u0000"
			# from javascript?
			parse_error()
			return
		if t.type is TYPE_TEXT
			pending_table_character_tokens.push t
			return
		# Anything else
		all_space = true
		for old in pending_table_character_tokens
			unless is_space_tok old
				all_space = false
				break
		if all_space
			for old in pending_table_character_tokens
				insert_character old
		else
			for old in pending_table_character_tokens
				ins_mode_in_table_else old
		pending_table_character_tokens = []
		ins_mode = original_ins_mode
		process_token t

	# 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
	ins_mode_in_caption = (t) ->
		if t.type is TYPE_END_TAG and t.name is 'caption'
			if is_in_table_scope 'caption', NS_HTML
				generate_implied_end_tags()
				if open_els[0].name isnt 'caption'
					parse_error()
				loop
					el = open_els.shift()
					if el.name is 'caption' and el.namespace is NS_HTML
						break
				clear_afe_to_marker()
				ins_mode = ins_mode_in_table
			else
				parse_error()
				# fragment case
			return
		if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
			parse_error()
			if is_in_table_scope 'caption', NS_HTML
				loop
					el = open_els.shift()
					if el.name is 'caption' and el.namespace is NS_HTML
						break
				clear_afe_to_marker()
				ins_mode = ins_mode_in_table
				process_token t
			# else fragment case
			return
		if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
			parse_error()
			return
		# Anything else
		ins_mode_in_body t

	# 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
	ins_mode_in_column_group = (t) ->
		if is_space_tok t
			insert_character t
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_START_TAG and t.name is 'col'
			el = insert_html_element t
			open_els.shift()
			t.acknowledge_self_closing()
			return
		if t.type is TYPE_END_TAG and t.name is 'colgroup'
			if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
				open_els.shift()
				ins_mode = ins_mode_in_table
			else
				parse_error()
			return
		if t.type is TYPE_END_TAG and t.name is 'col'
			parse_error()
			return
		if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
			ins_mode_in_head t
			return
		if t.type is TYPE_EOF
			ins_mode_in_body t
			return
		# Anything else
		if open_els[0].name isnt 'colgroup'
			parse_error()
			return
		open_els.shift()
		ins_mode = ins_mode_in_table
		process_token t
		return

	# 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
	ins_mode_in_table_body = (t) ->
		if t.type is TYPE_START_TAG and t.name is 'tr'
			clear_stack_to_table_body_context()
			insert_html_element t
			ins_mode = ins_mode_in_row
			return
		if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
			parse_error()
			clear_stack_to_table_body_context()
			insert_html_element new_open_tag 'tr'
			ins_mode = ins_mode_in_row
			process_token t
			return
		if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
			unless is_in_table_scope t.name, NS_HTML
				parse_error()
				return
			clear_stack_to_table_body_context()
			open_els.shift()
			ins_mode = ins_mode_in_table
			return
		if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
			has = false
			for el in open_els
				if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
					has = true
					break
				if table_scopers[el.name] is el.namespace
					break
			if !has
				parse_error()
				return
			clear_stack_to_table_body_context()
			open_els.shift()
			ins_mode = ins_mode_in_table
			process_token t
			return
		if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
			parse_error()
			return
		# Anything else
		ins_mode_in_table t

	# 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
	ins_mode_in_row = (t) ->
		if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
			clear_stack_to_table_row_context()
			insert_html_element t
			ins_mode = ins_mode_in_cell
			afe_push_marker()
			return
		if t.type is TYPE_END_TAG and t.name is 'tr'
			if is_in_table_scope 'tr', NS_HTML
				clear_stack_to_table_row_context()
				open_els.shift()
				ins_mode = ins_mode_in_table_body
			else
				parse_error()
			return
		if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
			if is_in_table_scope 'tr', NS_HTML
				clear_stack_to_table_row_context()
				open_els.shift()
				ins_mode = ins_mode_in_table_body
				process_token t
			else
				parse_error()
			return
		if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
			if is_in_table_scope t.name, NS_HTML
				if is_in_table_scope 'tr', NS_HTML
					clear_stack_to_table_row_context()
					open_els.shift()
					ins_mode = ins_mode_in_table_body
					process_token t
			else
				parse_error()
			return
		if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
			parse_error()
			return
		# Anything else
		ins_mode_in_table t

	# http://www.w3.org/TR/html5/syntax.html#close-the-cell
	close_the_cell = ->
		generate_implied_end_tags()
		unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
			parse_error()
		loop
			el = open_els.shift()
			if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
				break
		clear_afe_to_marker()
		ins_mode = ins_mode_in_row

	# 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
	ins_mode_in_cell = (t) ->
		if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
			if is_in_table_scope t.name, NS_HTML
				generate_implied_end_tags()
				unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
					parse_error()
				loop
					el = open_els.shift()
					if el.name is t.name and el.namespace is NS_HTML
						break
				clear_afe_to_marker()
				ins_mode = ins_mode_in_row
			else
				parse_error()
			return
		if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
			has = false
			for el in open_els
				if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
					has = true
					break
				if table_scopers[el.name] is el.namespace
					break
			if !has
				parse_error()
				return
			close_the_cell()
			process_token t
			return
		if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
			parse_error()
			return
		if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
			if is_in_table_scope t.name, NS_HTML
				close_the_cell()
				process_token t
			else
				parse_error()
			return
		# Anything Else
		ins_mode_in_body t

	# 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
	ins_mode_in_select = (t) ->
		if t.type is TYPE_TEXT and t.text is "\u0000"
			parse_error()
			return
		if t.type is TYPE_TEXT
			insert_character t
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_START_TAG and t.name is 'option'
			if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
				open_els.shift()
			insert_html_element t
			return
		if t.type is TYPE_START_TAG and t.name is 'optgroup'
			if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
				open_els.shift()
			if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
				open_els.shift()
			insert_html_element t
			return
		if t.type is TYPE_END_TAG and t.name is 'optgroup'
			if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
				if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
					open_els.shift()
			if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
				open_els.shift()
			else
				parse_error()
			return
		if t.type is TYPE_END_TAG and t.name is 'option'
			if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
				open_els.shift()
			else
				parse_error()
			return
		if t.type is TYPE_END_TAG and t.name is 'select'
			if is_in_select_scope 'select', NS_HTML
				loop
					el = open_els.shift()
					if el.name is 'select' and el.namespace is NS_HTML
						break
				reset_ins_mode()
			else
				parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'select'
			parse_error()
			loop
				el = open_els.shift()
				if el.name is 'select' and el.namespace is NS_HTML
					break
			reset_ins_mode()
			# spec says that this is the same as </select> but it doesn't say
			# to check scope first
			return
		if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
			parse_error()
			unless is_in_select_scope 'select', NS_HTML
				return
			loop
				el = open_els.shift()
				if el.name is 'select' and el.namespace is NS_HTML
					break
			reset_ins_mode()
			process_token t
			return
		if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
			ins_mode_in_head t
			return
		if t.type is TYPE_EOF
			ins_mode_in_body t
			return
		# Anything else
		parse_error()
		return

	# 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
	ins_mode_in_select_in_table = (t) ->
		if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
			parse_error()
			loop
				el = open_els.shift()
				if el.name is 'select' and el.namespace is NS_HTML
					break
			reset_ins_mode()
			process_token t
			return
		if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
			parse_error()
			unless is_in_table_scope t.name, NS_HTML
				return
			loop
				el = open_els.shift()
				if el.name is 'select' and el.namespace is NS_HTML
					break
			reset_ins_mode()
			process_token t
			return
		# Anything else
		ins_mode_in_select t
		return

	# 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
	ins_mode_in_template = (t) ->
		if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
			ins_mode_in_body t
			return
		if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
			ins_mode_in_head t
			return
		if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
			template_ins_modes.shift()
			template_ins_modes.unshift ins_mode_in_table
			ins_mode = ins_mode_in_table
			process_token t
			return
		if t.type is TYPE_START_TAG and t.name is 'col'
			template_ins_modes.shift()
			template_ins_modes.unshift ins_mode_in_column_group
			ins_mode = ins_mode_in_column_group
			process_token t
			return
		if t.type is TYPE_START_TAG and t.name is 'tr'
			template_ins_modes.shift()
			template_ins_modes.unshift ins_mode_in_table_body
			ins_mode = ins_mode_in_table_body
			process_token t
			return
		if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
			template_ins_modes.shift()
			template_ins_modes.unshift ins_mode_in_row
			ins_mode = ins_mode_in_row
			process_token t
			return
		if t.type is TYPE_START_TAG
			template_ins_modes.shift()
			template_ins_modes.unshift ins_mode_in_body
			ins_mode = ins_mode_in_body
			process_token t
			return
		if t.type is TYPE_END_TAG
			parse_error()
			return
		if t.type is TYPE_EOF
			unless template_tag_is_open()
				stop_parsing()
				return
			parse_error()
			loop
				el = open_els.shift()
				if el.name is 'template' and el.namespace is NS_HTML
					break
			clear_afe_to_marker()
			template_ins_modes.shift()
			reset_ins_mode()
			process_token t

	# 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
	ins_mode_after_body = (t) ->
		if is_space_tok t
			ins_mode_in_body t
			return
		if t.type is TYPE_COMMENT
			first = open_els[open_els.length - 1]
			insert_comment t, [first, first.children.length]
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_END_TAG and t.name is 'html'
			if flag_fragment_parsing
				parse_error()
				return
			ins_mode = ins_mode_after_after_body
			return
		if t.type is TYPE_EOF
			stop_parsing()
			return
		# Anything ELse
		parse_error()
		ins_mode = ins_mode_in_body
		process_token t

	# 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
	ins_mode_in_frameset = (t) ->
		if is_space_tok t
			insert_character t
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_START_TAG and t.name is 'frameset'
			insert_html_element t
			return
		if t.type is TYPE_END_TAG and t.name is 'frameset'
			if open_els.length is 1
				parse_error()
				return # fragment case
			open_els.shift()
			if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
				ins_mode = ins_mode_after_frameset
			return
		if t.type is TYPE_START_TAG and t.name is 'frame'
			insert_html_element t
			open_els.shift()
			t.acknowledge_self_closing()
			return
		if t.type is TYPE_START_TAG and t.name is 'noframes'
			ins_mode_in_head t
			return
		if t.type is TYPE_EOF
			if open_els.length isnt 1
				parse_error()
			stop_parsing()
			return
		# Anything else
		parse_error()
		return

	# 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
	ins_mode_after_frameset = (t) ->
		if is_space_tok t
			insert_character t
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and t.name is 'html'
			ins_mode_in_body t
			return
		if t.type is TYPE_END_TAG and t.name is 'html'
			ins_mode = ins_mode_after_after_frameset
			return
		if t.type is TYPE_START_TAG and t.name is 'noframes'
			ins_mode_in_head t
			return
		if t.type is TYPE_EOF
			stop_parsing()
			return
		# Anything else
		parse_error()
		return

	# 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
	ins_mode_after_after_body = (t) ->
		if t.type is TYPE_COMMENT
			insert_comment t, [doc, doc.children.length]
			return
		if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
			ins_mode_in_body t
			return
		if t.type is TYPE_EOF
			stop_parsing()
			return
		# Anything else
		parse_error()
		ins_mode = ins_mode_in_body
		process_token t
		return

	# 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
	ins_mode_after_after_frameset = (t) ->
		if t.type is TYPE_COMMENT
			insert_comment t, [doc, doc.children.length]
			return
		if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
			ins_mode_in_body t
			return
		if t.type is TYPE_EOF
			stop_parsing()
			return
		if t.type is TYPE_START_TAG and t.name is 'noframes'
			ins_mode_in_head t
			return
		# Anything else
		parse_error()
		return

	# 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
	has_color_face_or_size = (t) ->
		for a in t.attrs_a
			if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
				return true
		return false
	in_foreign_content_end_script = ->
		open_els.shift()
		# fixfull
		return
	in_foreign_content_other_start = (t) ->
		acn = adjusted_current_node()
		if acn.namespace is NS_MATHML
			adjust_mathml_attributes t
		if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
			t.name = svg_name_fixes[t.name]
		if acn.namespace is NS_SVG
			adjust_svg_attributes t
		adjust_foreign_attributes t
		insert_foreign_element t, acn.namespace
		if t.flag 'self-closing'
			if t.name is 'script'
				t.acknowledge_self_closing()
				in_foreign_content_end_script()
				# fixfull
			else
				open_els.shift()
				t.acknowledge_self_closing()
		return
	in_foreign_content = (t) ->
		if t.type is TYPE_TEXT and t.text is "\u0000"
			parse_error()
			insert_character new_character_token "\ufffd"
			return
		if is_space_tok t
			insert_character t
			return
		if t.type is TYPE_TEXT
			flag_frameset_ok = false
			insert_character t
			return
		if t.type is TYPE_COMMENT
			insert_comment t
			return
		if t.type is TYPE_DOCTYPE
			parse_error()
			return
		if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
			parse_error()
			if flag_fragment_parsing
				in_foreign_content_other_start t
				return
			loop # is this safe?
				open_els.shift()
				if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
					break
			process_token t
			return
		if t.type is TYPE_START_TAG
			in_foreign_content_other_start t
			return
		if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
			in_foreign_content_end_script()
			return
		if t.type is TYPE_END_TAG
			i = 0
			node = open_els[i]
			if node.name.toLowerCase() isnt t.name
				parse_error()
			loop
				if node is open_els[open_els.length - 1]
					return
				if node.name.toLowerCase() is t.name
					loop
						el = open_els.shift()
						if el is node
							return
				i += 1
				node = open_els[i]
				if node.namespace is NS_HTML
					break
			ins_mode t # explicitly call HTML insertion mode


	# 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
	tok_state_data = ->
		switch c = txt.charAt(cur++)
			when '&'
				return new_text_node parse_character_reference()
			when '<'
				tok_state = tok_state_tag_open
			when "\u0000"
				parse_error()
				return new_text_node "\ufffd"
			when '' # EOF
				return new_eof_token()
			else
				return new_text_node c
		return null

	# 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
	# not needed: tok_state_character_reference_in_data = ->
	# just call parse_character_reference()

	# 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
	tok_state_rcdata = ->
		switch c = txt.charAt(cur++)
			when '&'
				return new_text_node parse_character_reference()
			when '<'
				tok_state = tok_state_rcdata_less_than_sign
			when "\u0000"
				parse_error()
				return new_character_token "\ufffd"
			when '' # EOF
				return new_eof_token()
			else
				return new_character_token c
		return null

	# 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
	# not needed: tok_state_character_reference_in_rcdata = ->
	# just call parse_character_reference()

	# 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
	tok_state_rawtext = ->
		switch c = txt.charAt(cur++)
			when '<'
				tok_state = tok_state_rawtext_less_than_sign
			when "\u0000"
				parse_error()
				return new_character_token "\ufffd"
			when '' # EOF
				return new_eof_token()
			else
				return new_character_token c
		return null

	# 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
	tok_state_script_data = ->
		switch c = txt.charAt(cur++)
			when '<'
				tok_state = tok_state_script_data_less_than_sign
			when "\u0000"
				parse_error()
				return new_character_token "\ufffd"
			when '' # EOF
				return new_eof_token()
			else
				return new_character_token c
		return null

	# 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
	tok_state_plaintext = ->
		switch c = txt.charAt(cur++)
			when "\u0000"
				parse_error()
				return new_character_token "\ufffd"
			when '' # EOF
				return new_eof_token()
			else
				return new_character_token c
		return null


	# 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
	tok_state_tag_open = ->
		c = txt.charAt(cur++)
		if c is '!'
			tok_state = tok_state_markup_declaration_open
			return
		if c is '/'
			tok_state = tok_state_end_tag_open
			return
		if is_uc_alpha(c)
			tok_cur_tag = new_open_tag c.toLowerCase()
			tok_state = tok_state_tag_name
			return
		if is_lc_alpha(c)
			tok_cur_tag = new_open_tag c
			tok_state = tok_state_tag_name
			return
		if c is '?'
			parse_error()
			tok_cur_tag = new_comment_token '?' # FIXME right?
			tok_state = tok_state_bogus_comment
			return
		# Anything else
		parse_error()
		tok_state = tok_state_data
		cur -= 1 # we didn't parse/handle the char after <
		return new_text_node '<'

	# 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
	tok_state_end_tag_open = ->
		c = txt.charAt(cur++)
		if is_uc_alpha(c)
			tok_cur_tag = new_end_tag c.toLowerCase()
			tok_state = tok_state_tag_name
			return
		if is_lc_alpha(c)
			tok_cur_tag = new_end_tag c
			tok_state = tok_state_tag_name
			return
		if c is '>'
			parse_error()
			tok_state = tok_state_data
			return
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			return new_text_node '</'
		# Anything else
		parse_error()
		tok_cur_tag = new_comment_token c
		tok_state = tok_state_bogus_comment
		return null

	# 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
	tok_state_tag_name = ->
		switch c = txt.charAt(cur++)
			when "\t", "\n", "\u000c", ' '
				tok_state = tok_state_before_attribute_name
			when '/'
				tok_state = tok_state_self_closing_start_tag
			when '>'
				tok_state = tok_state_data
				tmp = tok_cur_tag
				tok_cur_tag = null
				return tmp
			when "\u0000"
				parse_error()
				tok_cur_tag.name += "\ufffd"
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
			else
				if is_uc_alpha(c)
					tok_cur_tag.name += c.toLowerCase()
				else
					tok_cur_tag.name += c
		return null

	# 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
	tok_state_rcdata_less_than_sign = ->
		c = txt.charAt(cur++)
		if c is '/'
			temporary_buffer = ''
			tok_state = tok_state_rcdata_end_tag_open
			return null
		# Anything else
		tok_state = tok_state_rcdata
		cur -= 1 # reconsume the input character
		return new_character_token '<'

	# 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
	tok_state_rcdata_end_tag_open = ->
		c = txt.charAt(cur++)
		if is_uc_alpha(c)
			tok_cur_tag = new_end_tag c.toLowerCase()
			temporary_buffer += c
			tok_state = tok_state_rcdata_end_tag_name
			return null
		if is_lc_alpha(c)
			tok_cur_tag = new_end_tag c
			temporary_buffer += c
			tok_state = tok_state_rcdata_end_tag_name
			return null
		# Anything else
		tok_state = tok_state_rcdata
		cur -= 1 # reconsume the input character
		return new_character_token "</" # fixfull separate these

	# http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
	is_appropriate_end_tag = (t) ->
		# spec says to check against "the tag name of the last start tag to
		# have been emitted from this tokenizer", but this is only called from
		# the various "raw" states, so it's hopefully ok to assume that
		# open_els[0].name will work instead TODO: verify this after the script
		# data states are implemented
		debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
		return t.type is TYPE_END_TAG and t.name is open_els[0].name

	# 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
	tok_state_rcdata_end_tag_name = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_before_attribute_name
				return
			# else fall through to "Anything else"
		if c is '/'
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
				return
			# else fall through to "Anything else"
		if c is '>'
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_data
				return tok_cur_tag
			# else fall through to "Anything else"
		if is_uc_alpha(c)
			tok_cur_tag.name += c.toLowerCase()
			temporary_buffer += c
			return null
		if is_lc_alpha(c)
			tok_cur_tag.name += c
			temporary_buffer += c
			return null
		# Anything else
		tok_state = tok_state_rcdata
		cur -= 1 # reconsume the input character
		return new_character_token '</' + temporary_buffer # fixfull separate these

	# 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
	tok_state_rawtext_less_than_sign = ->
		c = txt.charAt(cur++)
		if c is '/'
			temporary_buffer = ''
			tok_state = tok_state_rawtext_end_tag_open
			return null
		# Anything else
		tok_state = tok_state_rawtext
		cur -= 1 # reconsume the input character
		return new_character_token '<'

	# 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
	tok_state_rawtext_end_tag_open = ->
		c = txt.charAt(cur++)
		if is_uc_alpha(c)
			tok_cur_tag = new_end_tag c.toLowerCase()
			temporary_buffer += c
			tok_state = tok_state_rawtext_end_tag_name
			return null
		if is_lc_alpha(c)
			tok_cur_tag = new_end_tag c
			temporary_buffer += c
			tok_state = tok_state_rawtext_end_tag_name
			return null
		# Anything else
		tok_state = tok_state_rawtext
		cur -= 1 # reconsume the input character
		return new_character_token "</" # fixfull separate these

	# 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
	tok_state_rawtext_end_tag_name = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_before_attribute_name
				return
			# else fall through to "Anything else"
		if c is '/'
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_self_closing_start_tag
				return
			# else fall through to "Anything else"
		if c is '>'
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_data
				return tok_cur_tag
			# else fall through to "Anything else"
		if is_uc_alpha(c)
			tok_cur_tag.name += c.toLowerCase()
			temporary_buffer += c
			return null
		if is_lc_alpha(c)
			tok_cur_tag.name += c
			temporary_buffer += c
			return null
		# Anything else
		tok_state = tok_state_rawtext
		cur -= 1 # reconsume the input character
		return new_character_token '</' + temporary_buffer # fixfull separate these

	# 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
	tok_state_script_data_less_than_sign = ->
		c = txt.charAt(cur++)
		if c is '/'
			temporary_buffer = ''
			tok_state = tok_state_script_data_end_tag_open
			return
		if c is '!'
			tok_state = tok_state_script_data_escape_start
			return new_character_token '<!' # fixfull split
		# Anything else
		tok_state = tok_state_script_data
		cur -= 1 # Reconsume
		return new_character_token '<'

	# 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
	tok_state_script_data_end_tag_open = ->
		c = txt.charAt(cur++)
		if is_uc_alpha(c)
			tok_cur_tag = new_end_tag c.toLowerCase()
			temporary_buffer += c
			tok_state = tok_state_script_data_end_tag_name
			return
		if is_lc_alpha(c)
			tok_cur_tag = new_end_tag c
			temporary_buffer += c
			tok_state = tok_state_script_data_end_tag_name
			return
		# Anything else
		tok_state = tok_state_script_data
		cur -= 1 # Reconsume
		return new_character_token '</'

	# 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
	tok_state_script_data_end_tag_name = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_before_attribute_name
				return
			# fall through
		if c is '/'
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_self_closing_start_tag
				return
			# fall through
		if c is '>'
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_data
				return tok_cur_tag
			# fall through
		if is_uc_alpha(c)
			tok_cur_tag.name += c.toLowerCase()
			temporary_buffer += c
			return
		if is_lc_alpha(c)
			tok_cur_tag.name += c
			temporary_buffer += c
			return
		# Anything else
		tok_state = tok_state_script_data
		cur -= 1 # Reconsume
		return new_character_token "</#{temporary_buffer}" # fixfull split

	# 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
	tok_state_script_data_escape_start = ->
		c = txt.charAt(cur++)
		if c is '-'
			tok_state = tok_state_script_data_escape_start_dash
			return new_character_token '-'
		# Anything else
		tok_state = tok_state_script_data
		cur -= 1 # Reconsume
		return

	# 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
	tok_state_script_data_escape_start_dash = ->
		c = txt.charAt(cur++)
		if c is '-'
			tok_state = tok_state_script_data_escaped_dash_dash
			return new_character_token '-'
		# Anything else
		tok_state = tok_state_script_data
		cur -= 1 # Reconsume
		return

	# 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
	tok_state_script_data_escaped = ->
		c = txt.charAt(cur++)
		if c is '-'
			tok_state = tok_state_script_data_escaped_dash
			return new_character_token '-'
		if c is '<'
			tok_state = tok_state_script_data_escaped_less_than_sign
			return
		if c is "\u0000"
			parse_error()
			return new_character_token "\ufffd"
		if c is '' # EOF
			tok_state = tok_state_data
			parse_error()
			cur -= 1 # Reconsume
			return
		# Anything else
		return new_character_token c

	# 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
	tok_state_script_data_escaped_dash = ->
		c = txt.charAt(cur++)
		if c is '-'
			tok_state = tok_state_script_data_escaped_dash_dash
			return new_character_token '-'
		if c is '<'
			tok_state = tok_state_script_data_escaped_less_than_sign
			return
		if c is "\u0000"
			parse_error()
			tok_state = tok_state_script_data_escaped
			return new_character_token "\ufffd"
		if c is '' # EOF
			tok_state = tok_state_data
			parse_error()
			cur -= 1 # Reconsume
			return
		# Anything else
		tok_state = tok_state_script_data_escaped
		return new_character_token c

	# 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
	tok_state_script_data_escaped_dash_dash = ->
		c = txt.charAt(cur++)
		if c is '-'
			return new_character_token '-'
		if c is '<'
			tok_state = tok_state_script_data_escaped_less_than_sign
			return
		if c is '>'
			tok_state = tok_state_script_data
			return new_character_token '>'
		if c is "\u0000"
			parse_error()
			tok_state = tok_state_script_data_escaped
			return new_character_token "\ufffd"
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			cur -= 1 # Reconsume
			return
		# Anything else
		tok_state = tok_state_script_data_escaped
		return new_character_token c

	# 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
	tok_state_script_data_escaped_less_than_sign = ->
		c = txt.charAt(cur++)
		if c is '/'
			temporary_buffer = ''
			tok_state = tok_state_script_data_escaped_end_tag_open
			return
		if is_uc_alpha(c)
			temporary_buffer = c.toLowerCase() # yes, really
			tok_state = tok_state_script_data_double_escape_start
			return new_character_token "<#{c}" # fixfull split
		if is_lc_alpha(c)
			temporary_buffer = c
			tok_state = tok_state_script_data_double_escape_start
			return new_character_token "<#{c}" # fixfull split
		# Anything else
		tok_state = tok_state_script_data_escaped
		cur -= 1 # Reconsume
		return new_character_token '<'

	# 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
	tok_state_script_data_escaped_end_tag_open = ->
		c = txt.charAt(cur++)
		if is_uc_alpha(c)
			tok_cur_tag = new_end_tag c.toLowerCase()
			temporary_buffer += c
			tok_state = tok_state_script_data_escaped_end_tag_name
			return
		if is_lc_alpha(c)
			tok_cur_tag = new_end_tag c
			temporary_buffer += c
			tok_state = tok_state_script_data_escaped_end_tag_name
			return
		# Anything else
		tok_state = tok_state_script_data_escaped
		cur -= 1 # Reconsume
		return new_character_token '</' # fixfull split

	# 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
	tok_state_script_data_escaped_end_tag_name = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_before_attribute_name
				return
			# fall through
		if c is '/'
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_self_closing_start_tag
				return
			# fall through
		if c is '>'
			if is_appropriate_end_tag tok_cur_tag
				tok_state = tok_state_data
				return tok_cur_tag
			# fall through
		if is_uc_alpha(c)
			tok_cur_tag.name += c.toLowerCase()
			temporary_buffer += c.toLowerCase()
			return
		if is_lc_alpha(c)
			tok_cur_tag.name += c
			temporary_buffer += c.toLowerCase()
			return
		# Anything else
		tok_state = tok_state_script_data_escaped
		cur -= 1 # Reconsume
		return new_character_token "</#{temporary_buffer}" # fixfull split

	# 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
	tok_state_script_data_double_escape_start = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
			if temporary_buffer is 'script'
				tok_state = tok_state_script_data_double_escaped
			else
				tok_state = tok_state_script_data_escaped
			return new_character_token c
		if is_uc_alpha(c)
			temporary_buffer += c.toLowerCase() # yes, really lowercase
			return new_character_token c
		if is_lc_alpha(c)
			temporary_buffer += c
			return new_character_token c
		# Anything else
		tok_state = tok_state_script_data_escaped
		cur -= 1 # Reconsume
		return

	# 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
	tok_state_script_data_double_escaped = ->
		c = txt.charAt(cur++)
		if c is '-'
			tok_state = tok_state_script_data_double_escaped_dash
			return new_character_token '-'
		if c is '<'
			tok_state = tok_state_script_data_double_escaped_less_than_sign
			return new_character_token '<'
		if c is "\u0000"
			parse_error()
			return new_character_token "\ufffd"
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			cur -= 1 # Reconsume
			return
		# Anything else
		return new_character_token c

	# 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
	tok_state_script_data_double_escaped_dash = ->
		c = txt.charAt(cur++)
		if c is '-'
			tok_state = tok_state_script_data_double_escaped_dash_dash
			return new_character_token '-'
		if c is '<'
			tok_state = tok_state_script_data_double_escaped_less_than_sign
			return new_character_token '<'
		if c is "\u0000"
			parse_error()
			tok_state = tok_state_script_data_double_escaped
			return new_character_token "\ufffd"
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			cur -= 1 # Reconsume
			return
		# Anything else
		tok_state = tok_state_script_data_double_escaped
		return new_character_token c

	# 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
	tok_state_script_data_double_escaped_dash_dash = ->
		c = txt.charAt(cur++)
		if c is '-'
			return new_character_token '-'
		if c is '<'
			tok_state = tok_state_script_data_double_escaped_less_than_sign
			return new_character_token '<'
		if c is '>'
			tok_state = tok_state_script_data
			return new_character_token '>'
		if c is "\u0000"
			parse_error()
			tok_state = tok_state_script_data_double_escaped
			return new_character_token "\ufffd"
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			cur -= 1 # Reconsume
			return
		# Anything else
		tok_state = tok_state_script_data_double_escaped
		return new_character_token c

	# 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
	tok_state_script_data_double_escaped_less_than_sign = ->
		c = txt.charAt(cur++)
		if c is '/'
			temporary_buffer = ''
			tok_state = tok_state_script_data_double_escape_end
			return new_character_token '/'
		# Anything else
		tok_state = tok_state_script_data_double_escaped
		cur -= 1 # Reconsume
		return

	# 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
	tok_state_script_data_double_escape_end = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
			if temporary_buffer is 'script'
				tok_state = tok_state_script_data_escaped
			else
				tok_state = tok_state_script_data_double_escaped
			return new_character_token c
		if is_uc_alpha(c)
			temporary_buffer += c.toLowerCase() # yes, really lowercase
			return new_character_token c
		if is_lc_alpha(c)
			temporary_buffer += c
			return new_character_token c
		# Anything else
		tok_state = tok_state_script_data_double_escaped
		cur -= 1 # Reconsume
		return

	# 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
	tok_state_before_attribute_name = ->
		attr_name = null
		switch c = txt.charAt(cur++)
			when "\t", "\n", "\u000c", ' '
				return null
			when '/'
				tok_state = tok_state_self_closing_start_tag
				return null
			when '>'
				tok_state = tok_state_data
				tmp = tok_cur_tag
				tok_cur_tag = null
				return tmp
			when "\u0000"
				parse_error()
				attr_name = "\ufffd"
			when '"', "'", '<', '='
				parse_error()
				attr_name = c
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
			else
				if is_uc_alpha(c)
					attr_name = c.toLowerCase()
				else
					attr_name = c
		if attr_name?
			tok_cur_tag.attrs_a.unshift [attr_name, '']
			tok_state = tok_state_attribute_name
		return null

	# 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
	tok_state_attribute_name = ->
		switch c = txt.charAt(cur++)
			when "\t", "\n", "\u000c", ' '
				tok_state = tok_state_after_attribute_name
			when '/'
				tok_state = tok_state_self_closing_start_tag
			when '='
				tok_state = tok_state_before_attribute_value
			when '>'
				tok_state = tok_state_data
				tmp = tok_cur_tag
				tok_cur_tag = null
				return tmp
			when "\u0000"
				parse_error()
				tok_cur_tag.attrs_a[0][0] += "\ufffd"
			when '"', "'", '<'
				parse_error()
				tok_cur_tag.attrs_a[0][0] += c
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
			else
				if is_uc_alpha(c)
					tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
				else
					tok_cur_tag.attrs_a[0][0] += c
		return null

	# 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
	tok_state_after_attribute_name = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
			return
		if c is '/'
			tok_state = tok_state_self_closing_start_tag
			return
		if c is '='
			tok_state = tok_state_before_attribute_value
			return
		if c is '>'
			tok_state = tok_state_data
			return tok_cur_tag
		if is_uc_alpha(c)
			tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
			tok_state = tok_state_attribute_name
			return
		if c is "\u0000"
			parse_error()
			tok_cur_tag.attrs_a.unshift ["\ufffd", '']
			tok_state = tok_state_attribute_name
			return
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			cur -= 1 # reconsume
			return
		if c is '"' or c is "'" or c is '<'
			parse_error()
			# fall through to Anything else
		# Anything else
		tok_cur_tag.attrs_a.unshift [c, '']
		tok_state = tok_state_attribute_name

	# 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
	tok_state_before_attribute_value = ->
		switch c = txt.charAt(cur++)
			when "\t", "\n", "\u000c", ' '
				return null
			when '"'
				tok_state = tok_state_attribute_value_double_quoted
			when '&'
				tok_state = tok_state_attribute_value_unquoted
				cur -= 1
			when "'"
				tok_state = tok_state_attribute_value_single_quoted
			when "\u0000"
				# Parse error
				tok_cur_tag.attrs_a[0][1] += "\ufffd"
				tok_state = tok_state_attribute_value_unquoted
			when '>'
				# Parse error
				tok_state = tok_state_data
				tmp = tok_cur_tag
				tok_cur_tag = null
				return tmp
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
			else
				tok_cur_tag.attrs_a[0][1] += c
				tok_state = tok_state_attribute_value_unquoted
		return null

	# 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
	tok_state_attribute_value_double_quoted = ->
		switch c = txt.charAt(cur++)
			when '"'
				tok_state = tok_state_after_attribute_value_quoted
			when '&'
				tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
			when "\u0000"
				# Parse error
				tok_cur_tag.attrs_a[0][1] += "\ufffd"
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
			else
				tok_cur_tag.attrs_a[0][1] += c
		return null

	# 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
	tok_state_attribute_value_single_quoted = ->
		switch c = txt.charAt(cur++)
			when "'"
				tok_state = tok_state_after_attribute_value_quoted
			when '&'
				tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
			when "\u0000"
				# Parse error
				tok_cur_tag.attrs_a[0][1] += "\ufffd"
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
			else
				tok_cur_tag.attrs_a[0][1] += c
		return null

	# 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
	tok_state_attribute_value_unquoted = ->
		switch c = txt.charAt(cur++)
			when "\t", "\n", "\u000c", ' '
				tok_state = tok_state_before_attribute_name
			when '&'
				tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
			when '>'
				tok_state = tok_state_data
				tmp = tok_cur_tag
				tok_cur_tag = null
				return tmp
			when "\u0000"
				tok_cur_tag.attrs_a[0][1] += "\ufffd"
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
			else
				# Parse Error if ', <, = or ` (backtick)
				tok_cur_tag.attrs_a[0][1] += c
		return null

	# 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
	tok_state_after_attribute_value_quoted = ->
		switch c = txt.charAt(cur++)
			when "\t", "\n", "\u000c", ' '
				tok_state = tok_state_before_attribute_name
			when '/'
				tok_state = tok_state_self_closing_start_tag
			when '>'
				tok_state = tok_state_data
				tmp = tok_cur_tag
				tok_cur_tag = null
				return tmp
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
			else
				# Parse Error
				tok_state = tok_state_before_attribute_name
				cur -= 1 # we didn't handle that char
		return null

	# 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
	tok_state_self_closing_start_tag = ->
		c = txt.charAt(cur++)
		if c is '>'
			tok_cur_tag.flag 'self-closing', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is ''
			parse_error()
			tok_state = tok_state_data
			cur -= 1 # Reconsume
			return
		# Anything else
		parse_error()
		tok_state = tok_state_before_attribute_name
		cur -= 1 # Reconsume
		return

	# 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
	# WARNING: put a comment token in tok_cur_tag before setting this state
	tok_state_bogus_comment = ->
		next_gt = txt.indexOf '>', cur
		if next_gt is -1
			val = txt.substr cur
			cur = txt.length
		else
			val = txt.substr cur, (next_gt - cur)
			cur = next_gt + 1
		val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
		tok_cur_tag.text += val
		tok_state = tok_state_data
		return tok_cur_tag

	# 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
	tok_state_markup_declaration_open = ->
		if txt.substr(cur, 2) is '--'
			cur += 2
			tok_cur_tag = new_comment_token ''
			tok_state = tok_state_comment_start
			return
		if txt.substr(cur, 7).toLowerCase() is 'doctype'
			cur += 7
			tok_state = tok_state_doctype
			return
		acn = adjusted_current_node()
		if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
			cur += 7
			tok_state = tok_state_cdata_section
			return
		# Otherwise
		parse_error()
		tok_cur_tag = new_comment_token ''
		tok_state = tok_state_bogus_comment
		return

	# 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
	tok_state_comment_start = ->
		switch c = txt.charAt(cur++)
			when '-'
				tok_state = tok_state_comment_start_dash
			when "\u0000"
				parse_error()
				tok_state = tok_state_comment
				return new_character_token "\ufffd"
			when '>'
				parse_error()
				tok_state = tok_state_data
				return tok_cur_tag
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
				cur -= 1 # Reconsume
				return tok_cur_tag
			else
				tok_cur_tag.text += c
				tok_state = tok_state_comment
		return null

	# 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
	tok_state_comment_start_dash = ->
		switch c = txt.charAt(cur++)
			when '-'
				tok_state = tok_state_comment_end
			when "\u0000"
				parse_error()
				tok_cur_tag.text += "-\ufffd"
				tok_state = tok_state_comment
			when '>'
				parse_error()
				tok_state = tok_state_data
				return tok_cur_tag
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
				cur -= 1 # Reconsume
				return tok_cur_tag
			else
				tok_cur_tag.text += "-#{c}"
				tok_state = tok_state_comment
		return null

	# 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
	tok_state_comment = ->
		switch c = txt.charAt(cur++)
			when '-'
				tok_state = tok_state_comment_end_dash
			when "\u0000"
				parse_error()
				tok_cur_tag.text += "\ufffd"
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
				cur -= 1 # Reconsume
				return tok_cur_tag
			else
				tok_cur_tag.text += c
		return null

	# 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
	tok_state_comment_end_dash = ->
		switch c = txt.charAt(cur++)
			when '-'
				tok_state = tok_state_comment_end
			when "\u0000"
				parse_error()
				tok_cur_tag.text += "-\ufffd"
				tok_state = tok_state_comment
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
				cur -= 1 # Reconsume
				return tok_cur_tag
			else
				tok_cur_tag.text += "-#{c}"
				tok_state = tok_state_comment
		return null

	# 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
	tok_state_comment_end = ->
		switch c = txt.charAt(cur++)
			when '>'
				tok_state = tok_state_data
				return tok_cur_tag
			when "\u0000"
				parse_error()
				tok_cur_tag.text += "--\ufffd"
				tok_state = tok_state_comment
			when '!'
				parse_error()
				tok_state = tok_state_comment_end_bang
			when '-'
				parse_error()
				tok_cur_tag.text += '-'
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
				cur -= 1 # Reconsume
				return tok_cur_tag
			else
				parse_error()
				tok_cur_tag.text += "--#{c}"
				tok_state = tok_state_comment
		return null

	# 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
	tok_state_comment_end_bang = ->
		switch c = txt.charAt(cur++)
			when '-'
				tok_cur_tag.text += "--!#{c}"
				tok_state = tok_state_comment_end_dash
			when '>'
				tok_state = tok_state_data
				return tok_cur_tag
			when "\u0000"
				parse_error()
				tok_cur_tag.text += "--!\ufffd"
				tok_state = tok_state_comment
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
				cur -= 1 # Reconsume
				return tok_cur_tag
			else
				tok_cur_tag.text += "--!#{c}"
				tok_state = tok_state_comment
		return null

	# 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
	tok_state_doctype = ->
		switch c = txt.charAt(cur++)
			when "\t", "\u000a", "\u000c", ' '
				tok_state = tok_state_before_doctype_name
			when '' # EOF
				parse_error()
				tok_state = tok_state_data
				el = new_doctype_token ''
				el.flag 'force-quirks', true
				cur -= 1 # Reconsume
				return el
			else
				parse_error()
				tok_state = tok_state_before_doctype_name
				cur -= 1 # Reconsume
		return null

	# 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
	tok_state_before_doctype_name = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			return
		if is_uc_alpha(c)
			tok_cur_tag = new_doctype_token c.toLowerCase()
			tok_state = tok_state_doctype_name
			return
		if c is "\u0000"
			parse_error()
			tok_cur_tag = new_doctype_token "\ufffd"
			tok_state = tok_state_doctype_name
			return
		if c is '>'
			parse_error()
			el = new_doctype_token ''
			el.flag 'force-quirks', true
			tok_state = tok_state_data
			return el
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			el = new_doctype_token ''
			el.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return el
		# Anything else
		tok_cur_tag = new_doctype_token c
		tok_state = tok_state_doctype_name
		return null

	# 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
	tok_state_doctype_name = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			tok_state = tok_state_after_doctype_name
			return
		if c is '>'
			tok_state = tok_state_data
			return tok_cur_tag
		if is_uc_alpha(c)
			tok_cur_tag.name += c.toLowerCase()
			return
		if c is "\u0000"
			parse_error()
			tok_cur_tag.name += "\ufffd"
			return
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		tok_cur_tag.name += c
		return null

	# 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
	tok_state_after_doctype_name = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			return
		if c is '>'
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		if txt.substr(cur - 1, 6).toLowerCase() is 'public'
			cur += 5
			tok_state = tok_state_after_doctype_public_keyword
			return
		if txt.substr(cur - 1, 6).toLowerCase() is 'system'
			cur += 5
			tok_state = tok_state_after_doctype_system_keyword
			return
		parse_error()
		tok_cur_tag.flag 'force-quirks', true
		tok_state = tok_state_bogus_doctype
		return null

	# 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
	tok_state_after_doctype_public_keyword = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			tok_state = tok_state_before_doctype_public_identifier
			return
		if c is '"'
			parse_error()
			tok_cur_tag.public_identifier = ''
			tok_state = tok_state_doctype_public_identifier_double_quoted
			return
		if c is "'"
			parse_error()
			tok_cur_tag.public_identifier = ''
			tok_state = tok_state_doctype_public_identifier_single_quoted
			return
		if c is '>'
			parse_error()
			tok_cur_tag.flag 'force-quirks', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		parse_error()
		tok_cur_tag.flag 'force-quirks', true
		tok_state = tok_state_bogus_doctype
		return null

	# 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
	tok_state_before_doctype_public_identifier = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			return
		if c is '"'
			parse_error()
			tok_cur_tag.public_identifier = ''
			tok_state = tok_state_doctype_public_identifier_double_quoted
			return
		if c is "'"
			parse_error()
			tok_cur_tag.public_identifier = ''
			tok_state = tok_state_doctype_public_identifier_single_quoted
			return
		if c is '>'
			parse_error()
			tok_cur_tag.flag 'force-quirks', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		parse_error()
		tok_cur_tag.flag 'force-quirks', true
		tok_state = tok_state_bogus_doctype
		return null


	# 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
	tok_state_doctype_public_identifier_double_quoted = ->
		c = txt.charAt(cur++)
		if c is '"'
			tok_state = tok_state_after_doctype_public_identifier
			return
		if c is "\u0000"
			parse_error()
			tok_cur_tag.public_identifier += "\ufffd"
			return
		if c is '>'
			parse_error()
			tok_cur_tag.flag 'force-quirks', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		tok_cur_tag.public_identifier += c
		return null

	# 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
	tok_state_doctype_public_identifier_single_quoted = ->
		c = txt.charAt(cur++)
		if c is "'"
			tok_state = tok_state_after_doctype_public_identifier
			return
		if c is "\u0000"
			parse_error()
			tok_cur_tag.public_identifier += "\ufffd"
			return
		if c is '>'
			parse_error()
			tok_cur_tag.flag 'force-quirks', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		tok_cur_tag.public_identifier += c
		return null

	# 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
	tok_state_after_doctype_public_identifier = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			tok_state = tok_state_between_doctype_public_and_system_identifiers
			return
		if c is '>'
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '"'
			parse_error()
			tok_cur_tag.system_identifier = ''
			tok_state = tok_state_doctype_system_identifier_double_quoted
			return
		if c is "'"
			parse_error()
			tok_cur_tag.system_identifier = ''
			tok_state = tok_state_doctype_system_identifier_single_quoted
			return
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		parse_error()
		tok_cur_tag.flag 'force-quirks', true
		tok_state = tok_state_bogus_doctype
		return null

	# 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
	tok_state_between_doctype_public_and_system_identifiers = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			return
		if c is '>'
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '"'
			parse_error()
			tok_cur_tag.system_identifier = ''
			tok_state = tok_state_doctype_system_identifier_double_quoted
			return
		if c is "'"
			parse_error()
			tok_cur_tag.system_identifier = ''
			tok_state = tok_state_doctype_system_identifier_single_quoted
			return
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		parse_error()
		tok_cur_tag.flag 'force-quirks', true
		tok_state = tok_state_bogus_doctype
		return null

	# 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
	tok_state_after_doctype_system_keyword = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			tok_state = tok_state_before_doctype_system_identifier
			return
		if c is '"'
			parse_error()
			tok_cur_tag.system_identifier = ''
			tok_state = tok_state_doctype_system_identifier_double_quoted
			return
		if c is "'"
			parse_error()
			tok_cur_tag.system_identifier = ''
			tok_state = tok_state_doctype_system_identifier_single_quoted
			return
		if c is '>'
			parse_error()
			tok_cur_tag.flag 'force-quirks', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		parse_error()
		tok_cur_tag.flag 'force-quirks', true
		tok_state = tok_state_bogus_doctype
		return null

	# 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
	tok_state_before_doctype_system_identifier = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			return
		if c is '"'
			tok_cur_tag.system_identifier = ''
			tok_state = tok_state_doctype_system_identifier_double_quoted
			return
		if c is "'"
			tok_cur_tag.system_identifier = ''
			tok_state = tok_state_doctype_system_identifier_single_quoted
			return
		if c is '>'
			parse_error()
			tok_cur_tag.flag 'force-quirks', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		parse_error()
		tok_cur_tag.flag 'force-quirks', true
		tok_state = tok_state_bogus_doctype
		return null

	# 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
	tok_state_doctype_system_identifier_double_quoted = ->
		c = txt.charAt(cur++)
		if c is '"'
			tok_state = tok_state_after_doctype_system_identifier
			return
		if c is "\u0000"
			parse_error()
			tok_cur_tag.system_identifier += "\ufffd"
			return
		if c is '>'
			parse_error()
			tok_cur_tag.flag 'force-quirks', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		tok_cur_tag.system_identifier += c
		return null

	# 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
	tok_state_doctype_system_identifier_single_quoted = ->
		c = txt.charAt(cur++)
		if c is "'"
			tok_state = tok_state_after_doctype_system_identifier
			return
		if c is "\u0000"
			parse_error()
			tok_cur_tag.system_identifier += "\ufffd"
			return
		if c is '>'
			parse_error()
			tok_cur_tag.flag 'force-quirks', true
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		tok_cur_tag.system_identifier += c
		return null

	# 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
	tok_state_after_doctype_system_identifier = ->
		c = txt.charAt(cur++)
		if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
			return
		if c is '>'
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			parse_error()
			tok_state = tok_state_data
			tok_cur_tag.flag 'force-quirks', true
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		parse_error()
		# do _not_ tok_cur_tag.flag 'force-quirks', true
		tok_state = tok_state_bogus_doctype
		return null

	# 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
	tok_state_bogus_doctype = ->
		c = txt.charAt(cur++)
		if c is '>'
			tok_state = tok_state_data
			return tok_cur_tag
		if c is '' # EOF
			tok_state = tok_state_data
			cur -= 1 # Reconsume
			return tok_cur_tag
		# Anything else
		return null

	# 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
	tok_state_cdata_section = ->
		tok_state = tok_state_data
		next_gt = txt.indexOf ']]>', cur
		if next_gt is -1
			val = txt.substr cur
			cur = txt.length
		else
			val = txt.substr cur, (next_gt - cur)
			cur = next_gt + 3
		if val.length > 0
			return new_character_token val # fixfull split
		return null

	# 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
	# Don't set this as a state, just call it
	# returns a string (NOT a text node)
	parse_character_reference = (allowed_char = null, in_attr = false) ->
		if cur >= txt.length
			return '&'
		switch c = txt.charAt(cur)
			when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
				# explicitly not a parse error
				return '&'
			when ';'
				# there has to be "one or more" alnums between & and ; to be a parse error
				return '&'
			when '#'
				if cur + 1 >= txt.length
					return '&'
				if txt.charAt(cur + 1).toLowerCase() is 'x'
					base = 16
					charset = hex_chars
					start = cur + 2
				else
					charset = digits
					start = cur + 1
					base = 10
				i = 0
				while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
					i += 1
				if i is 0
					return '&'
				cur = start + i
				if txt.charAt(start + i) is ';'
					cur += 1
				else
					parse_error()
				code_point = txt.substr(start, i)
				while code_point.charAt(0) is '0' and code_point.length > 1
					code_point = code_point.substr 1
				code_point = parseInt(code_point, base)
				if unicode_fixes[code_point]?
					parse_error()
					return unicode_fixes[code_point]
				else
					if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
						parse_error()
						return "\ufffd"
					else
						if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
							parse_error()
						return from_code_point code_point
				return
			else
				for i in [0...31]
					if alnum.indexOf(txt.charAt(cur + i)) is -1
						break
				if i is 0
					# exit early, because parse_error() below needs at least one alnum
					return '&'
				if txt.charAt(cur + i) is ';'
					i += 1 # include ';' terminator in value
					decoded = decode_named_char_ref txt.substr(cur, i)
					if decoded?
						cur += i
						return decoded
					parse_error()
					return '&'
				else
					# no ';' terminator (only legacy char refs)
					max = i
					for i in [2..max] # no prefix matches, so ok to check shortest first
						c = legacy_char_refs[txt.substr(cur, i)]
						if c?
							if in_attr
								if txt.charAt(cur + i) is '='
									# "because some legacy user agents will
									# misinterpret the markup in those cases"
									parse_error()
									return '&'
								if alnum.indexOf(txt.charAt(cur + i)) > -1
									# this makes attributes forgiving about url args
									return '&'
							# ok, and besides the weird exceptions for attributes...
							# return the matching char
							cur += i # consume entity chars
							parse_error() # because no terminating ";"
							return c
					parse_error()
					return '&'
		return # never reached

	eat_next_token_if_newline = ->
		old_cur = cur
		t = null
		until t?
			t = tok_state()
		if t.type is TYPE_TEXT
			# definition of a newline depends on whether it was a character ref or not
			if cur - old_cur is 1
				# not a character reference
				if t.text is "\u000d" or t.text is "\u000a"
					return
			else
				if t.text is "\u000a"
					return
		# not a "newline"
		cur = old_cur
		return

	# tree constructor initialization
	# see comments on TYPE_TAG/etc for the structure of this data
	txt = args.html
	cur = 0
	doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
	doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
	open_els = []
	afe = [] # active formatting elements
	template_ins_modes = []
	ins_mode = ins_mode_initial
	original_ins_mode = ins_mode # TODO check spec
	flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
	flag_frameset_ok = true
	flag_parsing = true
	flag_foster_parenting = false
	form_element_pointer = null
	temporary_buffer = null
	pending_table_character_tokens = []
	head_element_pointer = null
	flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
	context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
	prev_node_id = 0 # just for debugging

	# tokenizer initialization
	tok_state = tok_state_data

	# text pre-processing
	# FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
	txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
	txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
	txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this

	if args.name is "webkit01.dat #12"
		console.log "hi"
	# proccess input
	# http://www.w3.org/TR/html5/syntax.html#tree-construction
	parse_main_loop = ->
		while flag_parsing
			t = tok_state()
			if t?
				process_token t
				# fixfull parse error if has self-closing flag, but it wasn't acknolwedged
	parse_main_loop()
	return doc.children

serialize_els = (els, shallow, show_ids) ->
	serialized = ''
	sep = ''
	for t in els
		serialized += sep
		sep = ','
		serialized += t.serialize shallow, show_ids
	return serialized

module.exports.parse_html = parse_html
module.exports.debug_log_reset = debug_log_reset
module.exports.debug_log_each = debug_log_each
module.exports.TYPE_TAG = TYPE_TAG
module.exports.TYPE_TEXT = TYPE_TEXT
module.exports.TYPE_COMMENT = TYPE_COMMENT
module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
module.exports.NS_HTML = NS_HTML
module.exports.NS_MATHML = NS_MATHML
module.exports.NS_SVG = NS_SVG
module.exports.QUIRKS_NO = QUIRKS_NO
module.exports.QUIRKS_LIMITED = QUIRKS_LIMITED
module.exports.QUIRKS_YES = QUIRKS_YES