X-Git-Url: https://jasonwoof.com/gitweb/?a=blobdiff_plain;f=parse-html.coffee;h=b25359dd7ac92e3b89af4e2727581c618f0a218d;hb=5addabc209b9abfce9b8d2f88cfba910c54f5ac6;hp=73eda84ffabceecd283782f1e4d7a90a290d10b9;hpb=af702500dd54507b24184075a7fb7c1f5acf70e5;p=peach-html5-editor.git

diff --git a/parse-html.coffee b/parse-html.coffee
index 73eda84..b25359d 100644
--- a/parse-html.coffee
+++ b/parse-html.coffee
@@ -112,7 +112,7 @@ class Node
 			@id = "#{++prev_node_id}"
 	acknowledge_self_closing: ->
 		if @token?
-			@token.flag 'did_self_close'
+			@token.flag 'did_self_close', true
 		else
 			@flag 'did_self_close', true
 	flag: (key, value = null) ->
@@ -340,14 +340,17 @@ special_elements = {
 	h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
 	header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
 	img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
-	listing:NS_HTML, main:NS_HTML, marquee:NS_HTML, meta:NS_HTML, nav:NS_HTML,
-	noembed:NS_HTML, noframes:NS_HTML, noscript:NS_HTML, object:NS_HTML,
-	ol:NS_HTML, p:NS_HTML, param:NS_HTML, plaintext:NS_HTML, pre:NS_HTML,
-	script:NS_HTML, section:NS_HTML, select:NS_HTML, source:NS_HTML,
-	style:NS_HTML, summary:NS_HTML, table:NS_HTML, tbody:NS_HTML, td:NS_HTML,
-	template:NS_HTML, textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML,
-	thead:NS_HTML, title:NS_HTML, tr:NS_HTML, track:NS_HTML, ul:NS_HTML,
-	wbr:NS_HTML, xmp:NS_HTML,
+	listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
+
+	menu:NS_HTML,menuitem:NS_HTML, # WATWG adds these
+
+	meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
+	noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
+	plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
+	select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
+	table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
+	textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
+	tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
 
 	# MathML:
 	mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
@@ -465,7 +468,7 @@ svg_attribute_fixes = {
 	diffuseconstant: 'diffuseConstant'
 	edgemode: 'edgeMode'
 	externalresourcesrequired: 'externalResourcesRequired'
-	filterres: 'filterRes'
+	# WTAG removes this: filterres: 'filterRes'
 	filterunits: 'filterUnits'
 	glyphref: 'glyphRef'
 	gradienttransform: 'gradientTransform'
@@ -517,6 +520,20 @@ svg_attribute_fixes = {
 	ychannelselector: 'yChannelSelector'
 	zoomandpan: 'zoomAndPan'
 }
+foreign_attr_fixes = {
+	'xlink:actuate': 'xlink actuate'
+	'xlink:arcrole': 'xlink arcrole'
+	'xlink:href': 'xlink href'
+	'xlink:role': 'xlink role'
+	'xlink:show': 'xlink show'
+	'xlink:title': 'xlink title'
+	'xlink:type': 'xlink type'
+	'xml:base': 'xml base'
+	'xml:lang': 'xml lang'
+	'xml:space': 'xml space'
+	'xmlns': 'xmlns'
+	'xmlns:xlink': 'xmlns xlink'
+}
 adjust_mathml_attributes = (t) ->
 	for a in t.attrs_a
 		if a[0] is 'definitionurl'
@@ -529,6 +546,9 @@ adjust_svg_attributes = (t) ->
 	return
 adjust_foreign_attributes = (t) ->
 	# fixfull
+	for a in t.attrs_a
+		if foreign_attr_fixes[a[0]]?
+			a[0] = foreign_attr_fixes[a[0]]
 	return
 
 # decode_named_char_ref()
@@ -629,10 +649,10 @@ parse_html = (args) ->
 	standard_scopers = {
 		applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
 		td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
-		template: NS_HTML, mi: NS_MATHML,
+		template: NS_HTML,
 
-		mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
-		'annotation-xml': NS_MATHML,
+		mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
+		mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
 
 		foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
 	}
@@ -1193,7 +1213,7 @@ parse_html = (args) ->
 			ins_mode t
 			return
 		if is_mathml_text_integration_point(acn)
-			if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
+			if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
 				ins_mode t
 				return
 			if t.type is TYPE_TEXT
@@ -1656,7 +1676,7 @@ parse_html = (args) ->
 			parse_error()
 			return if template_tag_is_open()
 			root_attrs = open_els[open_els.length - 1].attrs
-			for a of t.attrs_a
+			for a in t.attrs_a
 				root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
 			return
 
@@ -2202,7 +2222,8 @@ parse_html = (args) ->
 	ins_mode_in_table = (t) ->
 		switch t.type
 			when TYPE_TEXT
-				if t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr'
+				if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
+					pending_table_character_tokens = []
 					original_ins_mode = ins_mode
 					ins_mode = ins_mode_in_table_text
 					process_token t
@@ -2292,7 +2313,7 @@ parse_html = (args) ->
 	# 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
 	ins_mode_in_table_text = (t) ->
 		if t.type is TYPE_TEXT and t.text is "\u0000"
-			# huh? I thought the tokenizer didn't emit these
+			# from javascript?
 			parse_error()
 			return
 		if t.type is TYPE_TEXT
@@ -2309,8 +2330,8 @@ parse_html = (args) ->
 				insert_character old
 		else
 			for old in pending_table_character_tokens
-				ins_mode_table_else old
-		pending_table_character_tokens = [] # FIXME test (spec doesn't say this)
+				ins_mode_in_table_else old
+		pending_table_character_tokens = []
 		ins_mode = original_ins_mode
 		process_token t
 
@@ -2705,7 +2726,8 @@ parse_html = (args) ->
 			ins_mode_in_body t
 			return
 		if t.type is TYPE_COMMENT
-			insert_comment t, [open_els[0], open_els[0].children.length]
+			first = open_els[open_els.length - 1]
+			insert_comment t, [first, first.children.length]
 			return
 		if t.type is TYPE_DOCTYPE
 			parse_error()
@@ -2714,7 +2736,9 @@ parse_html = (args) ->
 			ins_mode_in_body t
 			return
 		if t.type is TYPE_END_TAG and t.name is 'html'
-			# fixfull fragment case
+			if flag_fragment_parsing
+				parse_error()
+				return
 			ins_mode = ins_mode_after_after_body
 			return
 		if t.type is TYPE_EOF
@@ -2782,7 +2806,7 @@ parse_html = (args) ->
 			ins_mode_in_body t
 			return
 		if t.type is TYPE_END_TAG and t.name is 'html'
-			insert_mode = ins_mode_after_after_frameset
+			ins_mode = ins_mode_after_after_frameset
 			return
 		if t.type is TYPE_START_TAG and t.name is 'noframes'
 			ins_mode_in_head t
@@ -2808,6 +2832,7 @@ parse_html = (args) ->
 		# Anything else
 		parse_error()
 		ins_mode = ins_mode_in_body
+		process_token t
 		return
 
 	# 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
@@ -2852,6 +2877,7 @@ parse_html = (args) ->
 			if t.name is 'script'
 				t.acknowledge_self_closing()
 				in_foreign_content_end_script()
+				# fixfull
 			else
 				open_els.shift()
 				t.acknowledge_self_closing()
@@ -2881,8 +2907,7 @@ parse_html = (args) ->
 				return
 			loop # is this safe?
 				open_els.shift()
-				cn = open_els[0]
-				if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
+				if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
 					break
 			process_token t
 			return
@@ -2893,9 +2918,11 @@ parse_html = (args) ->
 			in_foreign_content_end_script()
 			return
 		if t.type is TYPE_END_TAG
-			if open_els[0].name.toLowerCase() isnt t.name
+			i = 0
+			node = open_els[i]
+			if node.name.toLowerCase() isnt t.name
 				parse_error()
-			for node in open_els
+			loop
 				if node is open_els[open_els.length - 1]
 					return
 				if node.name.toLowerCase() is t.name
@@ -2903,6 +2930,8 @@ parse_html = (args) ->
 						el = open_els.shift()
 						if el is node
 							return
+				i += 1
+				node = open_els[i]
 				if node.namespace is NS_HTML
 					break
 			ins_mode t # explicitly call HTML insertion mode
@@ -2991,28 +3020,31 @@ parse_html = (args) ->
 
 	# 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
 	tok_state_tag_open = ->
-		switch c = txt.charAt(cur++)
-			when '!'
-				tok_state = tok_state_markup_declaration_open
-			when '/'
-				tok_state = tok_state_end_tag_open
-			when '?'
-				parse_error()
-				tok_cur_tag = new_comment_token '?'
-				tok_state = tok_state_bogus_comment
-			else
-				if is_lc_alpha(c)
-					tok_cur_tag = new_open_tag c
-					tok_state = tok_state_tag_name
-				else if is_uc_alpha(c)
-					tok_cur_tag = new_open_tag c.toLowerCase()
-					tok_state = tok_state_tag_name
-				else
-					parse_error()
-					tok_state = tok_state_data
-					cur -= 1 # we didn't parse/handle the char after <
-					return new_text_node '<'
-		return null
+		c = txt.charAt(cur++)
+		if c is '!'
+			tok_state = tok_state_markup_declaration_open
+			return
+		if c is '/'
+			tok_state = tok_state_end_tag_open
+			return
+		if is_uc_alpha(c)
+			tok_cur_tag = new_open_tag c.toLowerCase()
+			tok_state = tok_state_tag_name
+			return
+		if is_lc_alpha(c)
+			tok_cur_tag = new_open_tag c
+			tok_state = tok_state_tag_name
+			return
+		if c is '?'
+			parse_error()
+			tok_cur_tag = new_comment_token '?' # FIXME right?
+			tok_state = tok_state_bogus_comment
+			return
+		# Anything else
+		parse_error()
+		tok_state = tok_state_data
+		cur -= 1 # we didn't parse/handle the char after <
+		return new_text_node '<'
 
 	# 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
 	tok_state_end_tag_open = ->
@@ -3364,7 +3396,7 @@ parse_html = (args) ->
 		# Anything else
 		tok_state = tok_state_script_data_escaped
 		cur -= 1 # Reconsume
-		return new_character_token c
+		return new_character_token '<'
 
 	# 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
 	tok_state_script_data_escaped_end_tag_open = ->
@@ -3742,7 +3774,7 @@ parse_html = (args) ->
 	tok_state_self_closing_start_tag = ->
 		c = txt.charAt(cur++)
 		if c is '>'
-			tok_cur_tag.flag 'self-closing'
+			tok_cur_tag.flag 'self-closing', true
 			tok_state = tok_state_data
 			return tok_cur_tag
 		if c is ''
@@ -4483,7 +4515,7 @@ parse_html = (args) ->
 	txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
 	txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 
-	if args.name is "plain-text-unsafe.dat #4"
+	if args.name is "tests18.dat #17"
 		console.log "hi"
 	# proccess input
 	# http://www.w3.org/TR/html5/syntax.html#tree-construction