X-Git-Url: https://jasonwoof.com/gitweb/?a=blobdiff_plain;f=parse-html.coffee;h=adb9babbce8be63161825da256a11085b796191b;hb=06cc39431c9f7b4b4c10ae23be5652aca453238b;hp=9911bc0bfc20d3b32977df53439dda01b32dbfbd;hpb=89d356561aa91ca63239921fd8fbdbc04361aa62;p=peach-html5-editor.git

diff --git a/parse-html.coffee b/parse-html.coffee
index 9911bc0..adb9bab 100644
--- a/parse-html.coffee
+++ b/parse-html.coffee
@@ -24,7 +24,7 @@
 #
 # Deviations from that spec:
 #
-#   Purposeful: search this file for "WTAG"
+#   Purposeful: search this file for "WHATWG"
 #
 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
 
@@ -84,6 +84,11 @@ NS_HTML = 1
 NS_MATHML = 2
 NS_SVG = 3
 
+# quirks mode constants
+QUIRKS_NO = 1
+QUIRKS_LIMITED = 2
+QUIRKS_YES = 3
+
 g_debug_log = []
 debug_log_reset = ->
 	g_debug_log = []
@@ -112,7 +117,7 @@ class Node
 			@id = "#{++prev_node_id}"
 	acknowledge_self_closing: ->
 		if @token?
-			@token.flag 'did_self_close'
+			@token.flag 'did_self_close', true
 		else
 			@flag 'did_self_close', true
 	flag: (key, value = null) ->
@@ -249,6 +254,64 @@ unicode_fixes[0x9C] = "\u0153"
 unicode_fixes[0x9E] = "\u017E"
 unicode_fixes[0x9F] = "\u0178"
 
+quirks_yes_pi_prefixes = [
+	"+//silmaril//dtd html pro v0r11 19970101//"
+	"-//as//dtd html 3.0 aswedit + extensions//"
+	"-//advasoft ltd//dtd html 3.0 aswedit + extensions//"
+	"-//ietf//dtd html 2.0 level 1//"
+	"-//ietf//dtd html 2.0 level 2//"
+	"-//ietf//dtd html 2.0 strict level 1//"
+	"-//ietf//dtd html 2.0 strict level 2//"
+	"-//ietf//dtd html 2.0 strict//"
+	"-//ietf//dtd html 2.0//"
+	"-//ietf//dtd html 2.1e//"
+	"-//ietf//dtd html 3.0//"
+	"-//ietf//dtd html 3.2 final//"
+	"-//ietf//dtd html 3.2//"
+	"-//ietf//dtd html 3//"
+	"-//ietf//dtd html level 0//"
+	"-//ietf//dtd html level 1//"
+	"-//ietf//dtd html level 2//"
+	"-//ietf//dtd html level 3//"
+	"-//ietf//dtd html strict level 0//"
+	"-//ietf//dtd html strict level 1//"
+	"-//ietf//dtd html strict level 2//"
+	"-//ietf//dtd html strict level 3//"
+	"-//ietf//dtd html strict//"
+	"-//ietf//dtd html//"
+	"-//metrius//dtd metrius presentational//"
+	"-//microsoft//dtd internet explorer 2.0 html strict//"
+	"-//microsoft//dtd internet explorer 2.0 html//"
+	"-//microsoft//dtd internet explorer 2.0 tables//"
+	"-//microsoft//dtd internet explorer 3.0 html strict//"
+	"-//microsoft//dtd internet explorer 3.0 html//"
+	"-//microsoft//dtd internet explorer 3.0 tables//"
+	"-//netscape comm. corp.//dtd html//"
+	"-//netscape comm. corp.//dtd strict html//"
+	"-//o'reilly and associates//dtd html 2.0//"
+	"-//o'reilly and associates//dtd html extended 1.0//"
+	"-//o'reilly and associates//dtd html extended relaxed 1.0//"
+	"-//sq//dtd html 2.0 hotmetal + extensions//"
+	"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"
+	"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"
+	"-//spyglass//dtd html 2.0 extended//"
+	"-//sun microsystems corp.//dtd hotjava html//"
+	"-//sun microsystems corp.//dtd hotjava strict html//"
+	"-//w3c//dtd html 3 1995-03-24//"
+	"-//w3c//dtd html 3.2 draft//"
+	"-//w3c//dtd html 3.2 final//"
+	"-//w3c//dtd html 3.2//"
+	"-//w3c//dtd html 3.2s draft//"
+	"-//w3c//dtd html 4.0 frameset//"
+	"-//w3c//dtd html 4.0 transitional//"
+	"-//w3c//dtd html experimental 19960712//"
+	"-//w3c//dtd html experimental 970421//"
+	"-//w3c//dtd w3 html//"
+	"-//w3o//dtd w3 html 3.0//"
+	"-//webtechs//dtd mozilla html 2.0//"
+	"-//webtechs//dtd mozilla html//"
+]
+
 # These are the character references that don't need a terminating semicolon
 # min length: 2, max: 6, none are a prefix of any other.
 legacy_char_refs = {
@@ -340,14 +403,17 @@ special_elements = {
 	h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
 	header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
 	img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
-	listing:NS_HTML, main:NS_HTML, marquee:NS_HTML, meta:NS_HTML, nav:NS_HTML,
-	noembed:NS_HTML, noframes:NS_HTML, noscript:NS_HTML, object:NS_HTML,
-	ol:NS_HTML, p:NS_HTML, param:NS_HTML, plaintext:NS_HTML, pre:NS_HTML,
-	script:NS_HTML, section:NS_HTML, select:NS_HTML, source:NS_HTML,
-	style:NS_HTML, summary:NS_HTML, table:NS_HTML, tbody:NS_HTML, td:NS_HTML,
-	template:NS_HTML, textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML,
-	thead:NS_HTML, title:NS_HTML, tr:NS_HTML, track:NS_HTML, ul:NS_HTML,
-	wbr:NS_HTML, xmp:NS_HTML,
+	listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
+
+	menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
+
+	meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
+	noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
+	plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
+	select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
+	table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
+	textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
+	tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
 
 	# MathML:
 	mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
@@ -465,7 +531,7 @@ svg_attribute_fixes = {
 	diffuseconstant: 'diffuseConstant'
 	edgemode: 'edgeMode'
 	externalresourcesrequired: 'externalResourcesRequired'
-	filterres: 'filterRes'
+	# WHATWG removes this: filterres: 'filterRes'
 	filterunits: 'filterUnits'
 	glyphref: 'glyphRef'
 	gradienttransform: 'gradientTransform'
@@ -517,6 +583,20 @@ svg_attribute_fixes = {
 	ychannelselector: 'yChannelSelector'
 	zoomandpan: 'zoomAndPan'
 }
+foreign_attr_fixes = {
+	'xlink:actuate': 'xlink actuate'
+	'xlink:arcrole': 'xlink arcrole'
+	'xlink:href': 'xlink href'
+	'xlink:role': 'xlink role'
+	'xlink:show': 'xlink show'
+	'xlink:title': 'xlink title'
+	'xlink:type': 'xlink type'
+	'xml:base': 'xml base'
+	'xml:lang': 'xml lang'
+	'xml:space': 'xml space'
+	'xmlns': 'xmlns'
+	'xmlns:xlink': 'xmlns xlink'
+}
 adjust_mathml_attributes = (t) ->
 	for a in t.attrs_a
 		if a[0] is 'definitionurl'
@@ -529,6 +609,9 @@ adjust_svg_attributes = (t) ->
 	return
 adjust_foreign_attributes = (t) ->
 	# fixfull
+	for a in t.attrs_a
+		if foreign_attr_fixes[a[0]]?
+			a[0] = foreign_attr_fixes[a[0]]
 	return
 
 # decode_named_char_ref()
@@ -585,18 +668,29 @@ parse_html = (args) ->
 		else
 			console.log "Parse error at character #{cur} of #{txt.length}"
 
+	# http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
+	# "Noah's Ark clause" but with three
 	afe_push = (new_el) ->
 		matches = 0
 		for el, i in afe
+			if el.type is TYPE_AFE_MARKER
+				break
 			if el.name is new_el.name and el.namespace is new_el.namespace
+				attrs_match = true
 				for k, v of el.attrs
-					continue unless new_el.attrs[k] is v
-				for k, v of new_el.attrs
-					continue unless el.attrs[k] is v
-				matches += 1
-				if matches is 3
-					afe.splice i, 1
-					break
+					unless new_el.attrs[k] is v
+						attrs_match = false
+						break
+				if attrs_match
+					for k, v of new_el.attrs
+						unless el.attrs[k] is v
+							attrs_match = false
+							break
+				if attrs_match
+					matches += 1
+					if matches is 3
+						afe.splice i, 1
+						break
 		afe.unshift new_el
 	afe_push_marker = ->
 		afe.unshift new_afe_marker()
@@ -606,33 +700,33 @@ parse_html = (args) ->
 
 	# But first... the helpers
 	template_tag_is_open = ->
-		for t in open_els
-			if t.name is 'template' and t.namespace is NS_HTML
+		for el in open_els
+			if el.name is 'template' and el.namespace is NS_HTML
 				return true
 		return false
 	is_in_scope_x = (tag_name, scope, namespace) ->
-		for t in open_els
-			if t.name is tag_name and (namespace is null or namespace is t.namespace)
+		for el in open_els
+			if el.name is tag_name and (namespace is null or namespace is el.namespace)
 				return true
-			if scope[t.name] is t.namespace
+			if scope[el.name] is el.namespace
 				return false
 		return false
 	is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
-		for t in open_els
-			if t.name is tag_name and (namespace is null or namespace is t.namespace)
+		for el in open_els
+			if el.name is tag_name and (namespace is null or namespace is el.namespace)
 				return true
-			if scope[t.name] is t.namespace
+			if scope[el.name] is el.namespace
 				return false
-			if scope2[t.name] is t.namespace
+			if scope2[el.name] is el.namespace
 				return false
 		return false
 	standard_scopers = {
 		applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
 		td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
-		template: NS_HTML, mi: NS_MATHML,
+		template: NS_HTML,
 
-		mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
-		'annotation-xml': NS_MATHML,
+		mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
+		mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
 
 		foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
 	}
@@ -876,16 +970,46 @@ parse_html = (args) ->
 		debug_log "tree: #{serialize_els doc.children, false, true}"
 		debug_log "open_els: #{serialize_els open_els, true, true}"
 		debug_log "afe: #{serialize_els afe, true, true}"
+# this block implements tha W3C spec
+#		# 1. If the current node is an HTML element whose tag name is subject,
+#		# then run these substeps:
+#		#
+#		# 1. Let element be the current node.
+#		#
+#		# 2. Pop element off the stack of open elements.
+#		#
+#		# 3. If element is also in the list of active formatting elements,
+#		# remove the element from the list.
+#		#
+#		# 4. Abort the adoption agency algorithm.
+#		if open_els[0].name is subject and open_els[0].namespace is NS_HTML
+#			el = open_els.shift()
+#			# remove it from the list of active formatting elements (if found)
+#			for t, i in afe
+#				if t is el
+#					afe.splice i, 1
+#					break
+#			debug_log "aaa: starting off with subject on top of stack, exiting"
+#			return
+# WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
+		# If the current node is an HTML element whose tag name is subject, and
+		# the current node is not in the list of active formatting elements,
+		# then pop the current node off the stack of open elements, and abort
+		# these steps.
 		if open_els[0].name is subject and open_els[0].namespace is NS_HTML
-			el = open_els[0]
-			open_els.shift()
+			debug_log "aaa: starting off with subject on top of stack, exiting"
 			# remove it from the list of active formatting elements (if found)
-			for t, i in afe
-				if t is el
-					afe.splice i, 1
+			in_afe = false
+			for el, i in afe
+				if el is open_els[0]
+					in_afe = true
 					break
-			debug_log "aaa: starting off with subject on top of stack, exiting"
-			return
+			unless in_afe
+				debug_log "aaa: ...and not in afe, aaa done"
+				open_els.shift()
+				return
+			# fall through
+# END WHATWG
 		outer = 0
 		loop
 			if outer >= 8
@@ -1193,7 +1317,7 @@ parse_html = (args) ->
 			ins_mode t
 			return
 		if is_mathml_text_integration_point(acn)
-			if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
+			if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
 				ins_mode t
 				return
 			if t.type is TYPE_TEXT
@@ -1369,6 +1493,35 @@ parse_html = (args) ->
 
 	# 8.2.5.4.1 The "initial" insertion mode
 	# http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
+	is_quirks_yes_doctype = (t) ->
+		if t.flag 'force-quirks'
+			return true
+		if t.name isnt 'html'
+			return true
+		if t.public_identifier?
+			pi = t.public_identifier.toLowerCase()
+			for p in quirks_yes_pi_prefixes
+				if pi.substr(0, p.length) is p
+					return true
+			if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html'
+				return true
+		if t.system_identifier?
+			if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
+				return true
+		else if t.public_identifier?
+			# already did this: pi = t.public_identifier.toLowerCase()
+			if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
+				return true
+		return false
+	is_quirks_limited_doctype = (t) ->
+		if t.public_identifier?
+			pi = t.public_identifier.toLowerCase()
+			if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//'
+				return true
+			if t.system_identifier?
+				if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
+					return true
+		return false
 	ins_mode_initial = (t) ->
 		if is_space_tok t
 			return
@@ -1377,13 +1530,20 @@ parse_html = (args) ->
 			doc.children.push t
 			return
 		if t.type is TYPE_DOCTYPE
-			# FIXME check identifiers, set quirks, etc
-			# fixfull
+			# fixfull syntax error from first paragraph and following bullets
+			# fixfull set doc.doctype
+			# fixfull is the "not an iframe srcdoc" thing relevant?
+			if is_quirks_yes_doctype t
+				doc.flag 'quirks mode', QUIRKS_YES
+			else if is_quirks_limited_doctype t
+				doc.flag 'quirks mode', QUIRKS_LIMITED
 			doc.children.push t
 			ins_mode = ins_mode_before_html
 			return
 		# Anything else
-		#fixfull (iframe, quirks)
+		# fixfull not iframe srcdoc?
+		parse_error()
+		doc.flag 'quirks mode', QUIRKS_YES
 		ins_mode = ins_mode_before_html
 		process_token t
 		return
@@ -1412,9 +1572,9 @@ parse_html = (args) ->
 				parse_error()
 				return
 		# Anything else
-		html_tok = new_open_tag 'html'
-		el = token_to_element html_tok, NS_HTML, doc
+		el = token_to_element new_open_tag('html'), NS_HTML, doc
 		doc.children.push el
+		el.parent = doc
 		open_els.unshift el
 		# ?fixfull browsing context
 		ins_mode = ins_mode_before_head
@@ -1446,8 +1606,7 @@ parse_html = (args) ->
 				parse_error()
 				return
 		# Anything else
-		head_tok = new_open_tag 'head'
-		el = insert_html_element head_tok
+		el = insert_html_element new_open_tag 'head'
 		head_element_pointer = el
 		ins_mode = ins_mode_in_head
 		process_token t
@@ -1601,7 +1760,7 @@ parse_html = (args) ->
 			parse_error()
 			open_els.unshift head_element_pointer
 			ins_mode_in_head t
-			for el, i of open_els
+			for el, i in open_els
 				if el is head_element_pointer
 					open_els.splice i, 1
 					return
@@ -1621,17 +1780,23 @@ parse_html = (args) ->
 
 	# 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
 	in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
-		for el, i in open_els
-			if el.name is name and el.namespace is NS_HTML
+		node = open_els[0]
+		loop
+			if node.name is name and node.namespace is NS_HTML
 				generate_implied_end_tags name # arg is exception
-				parse_error() unless i is 0
-				while i >= 0
-					open_els.shift()
-					i -= 1
-				return
-			if special_elements[el.name] is el.namespace
+				unless node is open_els[0]
+					parse_error()
+				loop
+					el = open_els.shift()
+					if el is node
+						return
+			if special_elements[node.name] is node.namespace
 				parse_error()
 				return
+			for el, i in open_els
+				if node is el
+					node = open_els[i + 1]
+					break
 		return
 	ins_mode_in_body = (t) ->
 		if t.type is TYPE_TEXT and t.text is "\u0000"
@@ -1656,7 +1821,7 @@ parse_html = (args) ->
 			parse_error()
 			return if template_tag_is_open()
 			root_attrs = open_els[open_els.length - 1].attrs
-			for a of t.attrs_a
+			for a in t.attrs_a
 				root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
 			return
 
@@ -1671,7 +1836,7 @@ parse_html = (args) ->
 			return unless second.name is 'body'
 			return if template_tag_is_open()
 			flag_frameset_ok = false
-			for a of t.attrs_a
+			for a in t.attrs_a
 				second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
 			return
 		if t.type is TYPE_START_TAG and t.name is 'frameset'
@@ -1958,6 +2123,10 @@ parse_html = (args) ->
 			return
 		if t.type is TYPE_START_TAG and t.name is 'nobr'
 			reconstruct_afe()
+			if is_in_scope 'nobr', NS_HTML
+				parse_error()
+				adoption_agency 'nobr'
+				reconstruct_afe()
 			el = insert_html_element t
 			afe_push el
 			return
@@ -1984,14 +2153,15 @@ parse_html = (args) ->
 			clear_afe_to_marker()
 			return
 		if t.type is TYPE_START_TAG and t.name is 'table'
-			close_p_if_in_button_scope() # fixfull quirksmode thing
+			unless doc.flag('quirks mode') is QUIRKS_YES
+				close_p_if_in_button_scope() # test
 			insert_html_element t
 			flag_frameset_ok = false
 			ins_mode = ins_mode_in_table
 			return
 		if t.type is TYPE_END_TAG and t.name is 'br'
 			parse_error()
-			t.type is TYPE_START_TAG
+			t.type = TYPE_START_TAG
 			# fall through
 		if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
 			reconstruct_afe()
@@ -2008,7 +2178,8 @@ parse_html = (args) ->
 			unless is_input_hidden_tok t
 				flag_frameset_ok = false
 			return
-		if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
+		if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
+			# WHATWG adds 'menuitem' for this block
 			insert_html_element t
 			open_els.shift()
 			t.acknowledge_self_closing()
@@ -2118,7 +2289,7 @@ parse_html = (args) ->
 #					parse_error()
 #			insert_html_element t
 #			return
-# below implements the WATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
+# below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
 		if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
 			if is_in_scope 'ruby', NS_HTML
 				generate_implied_end_tags()
@@ -2133,7 +2304,7 @@ parse_html = (args) ->
 					parse_error()
 			insert_html_element t
 			return
-# end WATWG chunk
+# end WHATWG chunk
 		if t.type is TYPE_START_TAG and t.name is 'math'
 			reconstruct_afe()
 			adjust_mathml_attributes t
@@ -2564,7 +2735,7 @@ parse_html = (args) ->
 			insert_html_element t
 			return
 		if t.type is TYPE_END_TAG and t.name is 'optgroup'
-			if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
+			if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
 				if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
 					open_els.shift()
 			if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
@@ -2706,7 +2877,8 @@ parse_html = (args) ->
 			ins_mode_in_body t
 			return
 		if t.type is TYPE_COMMENT
-			insert_comment t, [open_els[0], open_els[0].children.length]
+			first = open_els[open_els.length - 1]
+			insert_comment t, [first, first.children.length]
 			return
 		if t.type is TYPE_DOCTYPE
 			parse_error()
@@ -2715,7 +2887,9 @@ parse_html = (args) ->
 			ins_mode_in_body t
 			return
 		if t.type is TYPE_END_TAG and t.name is 'html'
-			# fixfull fragment case
+			if flag_fragment_parsing
+				parse_error()
+				return
 			ins_mode = ins_mode_after_after_body
 			return
 		if t.type is TYPE_EOF
@@ -2783,7 +2957,7 @@ parse_html = (args) ->
 			ins_mode_in_body t
 			return
 		if t.type is TYPE_END_TAG and t.name is 'html'
-			insert_mode = ins_mode_after_after_frameset
+			ins_mode = ins_mode_after_after_frameset
 			return
 		if t.type is TYPE_START_TAG and t.name is 'noframes'
 			ins_mode_in_head t
@@ -2809,6 +2983,7 @@ parse_html = (args) ->
 		# Anything else
 		parse_error()
 		ins_mode = ins_mode_in_body
+		process_token t
 		return
 
 	# 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
@@ -2853,6 +3028,7 @@ parse_html = (args) ->
 			if t.name is 'script'
 				t.acknowledge_self_closing()
 				in_foreign_content_end_script()
+				# fixfull
 			else
 				open_els.shift()
 				t.acknowledge_self_closing()
@@ -2882,8 +3058,7 @@ parse_html = (args) ->
 				return
 			loop # is this safe?
 				open_els.shift()
-				cn = open_els[0]
-				if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
+				if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
 					break
 			process_token t
 			return
@@ -2894,9 +3069,11 @@ parse_html = (args) ->
 			in_foreign_content_end_script()
 			return
 		if t.type is TYPE_END_TAG
-			if open_els[0].name.toLowerCase() isnt t.name
+			i = 0
+			node = open_els[i]
+			if node.name.toLowerCase() isnt t.name
 				parse_error()
-			for node in open_els
+			loop
 				if node is open_els[open_els.length - 1]
 					return
 				if node.name.toLowerCase() is t.name
@@ -2904,6 +3081,8 @@ parse_html = (args) ->
 						el = open_els.shift()
 						if el is node
 							return
+				i += 1
+				node = open_els[i]
 				if node.namespace is NS_HTML
 					break
 			ins_mode t # explicitly call HTML insertion mode
@@ -2992,50 +3171,55 @@ parse_html = (args) ->
 
 	# 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
 	tok_state_tag_open = ->
-		switch c = txt.charAt(cur++)
-			when '!'
-				tok_state = tok_state_markup_declaration_open
-			when '/'
-				tok_state = tok_state_end_tag_open
-			when '?'
-				parse_error()
-				tok_cur_tag = new_comment_token '?'
-				tok_state = tok_state_bogus_comment
-			else
-				if is_lc_alpha(c)
-					tok_cur_tag = new_open_tag c
-					tok_state = tok_state_tag_name
-				else if is_uc_alpha(c)
-					tok_cur_tag = new_open_tag c.toLowerCase()
-					tok_state = tok_state_tag_name
-				else
-					parse_error()
-					tok_state = tok_state_data
-					cur -= 1 # we didn't parse/handle the char after <
-					return new_text_node '<'
-		return null
+		c = txt.charAt(cur++)
+		if c is '!'
+			tok_state = tok_state_markup_declaration_open
+			return
+		if c is '/'
+			tok_state = tok_state_end_tag_open
+			return
+		if is_uc_alpha(c)
+			tok_cur_tag = new_open_tag c.toLowerCase()
+			tok_state = tok_state_tag_name
+			return
+		if is_lc_alpha(c)
+			tok_cur_tag = new_open_tag c
+			tok_state = tok_state_tag_name
+			return
+		if c is '?'
+			parse_error()
+			tok_cur_tag = new_comment_token '?' # FIXME right?
+			tok_state = tok_state_bogus_comment
+			return
+		# Anything else
+		parse_error()
+		tok_state = tok_state_data
+		cur -= 1 # we didn't parse/handle the char after <
+		return new_text_node '<'
 
 	# 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
 	tok_state_end_tag_open = ->
-		switch c = txt.charAt(cur++)
-			when '>'
-				parse_error()
-				tok_state = tok_state_data
-			when '' # EOF
-				parse_error()
-				tok_state = tok_state_data
-				return new_text_node '</'
-			else
-				if is_uc_alpha(c)
-					tok_cur_tag = new_end_tag c.toLowerCase()
-					tok_state = tok_state_tag_name
-				else if is_lc_alpha(c)
-					tok_cur_tag = new_end_tag c
-					tok_state = tok_state_tag_name
-				else
-					parse_error()
-					tok_cur_tag = new_comment_token '/'
-					tok_state = tok_state_bogus_comment
+		c = txt.charAt(cur++)
+		if is_uc_alpha(c)
+			tok_cur_tag = new_end_tag c.toLowerCase()
+			tok_state = tok_state_tag_name
+			return
+		if is_lc_alpha(c)
+			tok_cur_tag = new_end_tag c
+			tok_state = tok_state_tag_name
+			return
+		if c is '>'
+			parse_error()
+			tok_state = tok_state_data
+			return
+		if c is '' # EOF
+			parse_error()
+			tok_state = tok_state_data
+			return new_text_node '</'
+		# Anything else
+		parse_error()
+		tok_cur_tag = new_comment_token c
+		tok_state = tok_state_bogus_comment
 		return null
 
 	# 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
@@ -3365,7 +3549,7 @@ parse_html = (args) ->
 		# Anything else
 		tok_state = tok_state_script_data_escaped
 		cur -= 1 # Reconsume
-		return new_character_token c
+		return new_character_token '<'
 
 	# 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
 	tok_state_script_data_escaped_end_tag_open = ->
@@ -3743,7 +3927,7 @@ parse_html = (args) ->
 	tok_state_self_closing_start_tag = ->
 		c = txt.charAt(cur++)
 		if c is '>'
-			tok_cur_tag.flag 'self-closing'
+			tok_cur_tag.flag 'self-closing', true
 			tok_state = tok_state_data
 			return tok_cur_tag
 		if c is ''
@@ -4363,7 +4547,9 @@ parse_html = (args) ->
 		else
 			val = txt.substr cur, (next_gt - cur)
 			cur = next_gt + 3
-		return new_character_token val # fixfull split
+		if val.length > 0
+			return new_character_token val # fixfull split
+		return null
 
 	# 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
 	# Don't set this as a state, just call it
@@ -4459,6 +4645,7 @@ parse_html = (args) ->
 	txt = args.html
 	cur = 0
 	doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
+	doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
 	open_els = []
 	afe = [] # active formatting elements
 	template_ins_modes = []
@@ -4474,6 +4661,7 @@ parse_html = (args) ->
 	head_element_pointer = null
 	flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
 	context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
+	prev_node_id = 0 # just for debugging
 
 	# tokenizer initialization
 	tok_state = tok_state_data
@@ -4484,7 +4672,7 @@ parse_html = (args) ->
 	txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
 	txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 
-	if args.name is "plain-text-unsafe.dat #4"
+	if args.name is "tests23.dat #1"
 		console.log "hi"
 	# proccess input
 	# http://www.w3.org/TR/html5/syntax.html#tree-construction
@@ -4514,3 +4702,6 @@ module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
 module.exports.NS_HTML = NS_HTML
 module.exports.NS_MATHML = NS_MATHML
 module.exports.NS_SVG = NS_SVG
+module.exports.QUIRKS_NO = QUIRKS_NO
+module.exports.QUIRKS_LIMITED = QUIRKS_LIMITED
+module.exports.QUIRKS_YES = QUIRKS_YES