X-Git-Url: https://jasonwoof.com/gitweb/?a=blobdiff_plain;ds=sidebyside;f=parse-html.coffee;h=20bc99c39de4543dc344ad4f79e8ff9e39f2ac85;hb=e7cac479773cf44f9ae28a3f559cef095b19e361;hp=b3b3f7fd9d51dcc65e9bc237b43ec16afb06e736;hpb=fcaaf0f85eb620893b85f5efcb8d894b68793cd9;p=peach-html5-editor.git

diff --git a/parse-html.coffee b/parse-html.coffee
index b3b3f7f..20bc99c 100644
--- a/parse-html.coffee
+++ b/parse-html.coffee
@@ -1780,17 +1780,23 @@ parse_html = (args) ->
 
 	# 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
 	in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
-		for el, i in open_els
-			if el.name is name and el.namespace is NS_HTML
+		node = open_els[0]
+		loop
+			if node.name is name and node.namespace is NS_HTML
 				generate_implied_end_tags name # arg is exception
-				parse_error() unless i is 0
-				while i >= 0
-					open_els.shift()
-					i -= 1
-				return
-			if special_elements[el.name] is el.namespace
+				unless node is open_els[0]
+					parse_error()
+				loop
+					el = open_els.shift()
+					if el is node
+						return
+			if special_elements[node.name] is node.namespace
 				parse_error()
 				return
+			for el, i in open_els
+				if node is el
+					node = open_els[i + 1]
+					break
 		return
 	ins_mode_in_body = (t) ->
 		if t.type is TYPE_TEXT and t.text is "\u0000"
@@ -1918,11 +1924,7 @@ parse_html = (args) ->
 		if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
 			close_p_if_in_button_scope()
 			insert_html_element t
-			# spec: If the next token is a "LF" (U+000A) character token, then
-			# ignore that token and move on to the next one. (Newlines at the
-			# start of pre blocks are ignored as an authoring convenience.)
-			if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
-				cur += 1
+			eat_next_token_if_newline()
 			flag_frameset_ok = false
 			return
 		if t.type is TYPE_START_TAG and t.name is 'form'
@@ -2117,6 +2119,10 @@ parse_html = (args) ->
 			return
 		if t.type is TYPE_START_TAG and t.name is 'nobr'
 			reconstruct_afe()
+			if is_in_scope 'nobr', NS_HTML
+				parse_error()
+				adoption_agency 'nobr'
+				reconstruct_afe()
 			el = insert_html_element t
 			afe_push el
 			return
@@ -2151,7 +2157,8 @@ parse_html = (args) ->
 			return
 		if t.type is TYPE_END_TAG and t.name is 'br'
 			parse_error()
-			t.type = TYPE_START_TAG
+			# W3C: t.type = TYPE_START_TAG
+			t = new_open_tag 'br' # WHATWG
 			# fall through
 		if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
 			reconstruct_afe()
@@ -2168,7 +2175,8 @@ parse_html = (args) ->
 			unless is_input_hidden_tok t
 				flag_frameset_ok = false
 			return
-		if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
+		if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
+			# WHATWG adds 'menuitem' for this block
 			insert_html_element t
 			open_els.shift()
 			t.acknowledge_self_closing()
@@ -2228,8 +2236,7 @@ parse_html = (args) ->
 			return
 		if t.type is TYPE_START_TAG and t.name is 'textarea'
 			insert_html_element t
-			if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
-				cur += 1
+			eat_next_token_if_newline()
 			tok_state = tok_state_rcdata
 			original_ins_mode = ins_mode
 			flag_frameset_ok = false
@@ -2724,7 +2731,7 @@ parse_html = (args) ->
 			insert_html_element t
 			return
 		if t.type is TYPE_END_TAG and t.name is 'optgroup'
-			if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
+			if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
 				if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
 					open_els.shift()
 			if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
@@ -2760,7 +2767,7 @@ parse_html = (args) ->
 			return
 		if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
 			parse_error()
-			if is_in_select_scope 'select', NS_HTML
+			unless is_in_select_scope 'select', NS_HTML
 				return
 			loop
 				el = open_els.shift()
@@ -3086,7 +3093,7 @@ parse_html = (args) ->
 				tok_state = tok_state_tag_open
 			when "\u0000"
 				parse_error()
-				return new_text_node "\ufffd"
+				return new_text_node c
 			when '' # EOF
 				return new_eof_token()
 			else
@@ -3783,7 +3790,7 @@ parse_html = (args) ->
 			return
 		if c is '>'
 			tok_state = tok_state_data
-			return
+			return tok_cur_tag
 		if is_uc_alpha(c)
 			tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
 			tok_state = tok_state_attribute_name
@@ -4536,6 +4543,7 @@ parse_html = (args) ->
 		else
 			val = txt.substr cur, (next_gt - cur)
 			cur = next_gt + 3
+		val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
 		if val.length > 0
 			return new_character_token val # fixfull split
 		return null
@@ -4629,6 +4637,24 @@ parse_html = (args) ->
 					return '&'
 		return # never reached
 
+	eat_next_token_if_newline = ->
+		old_cur = cur
+		t = null
+		until t?
+			t = tok_state()
+		if t.type is TYPE_TEXT
+			# definition of a newline depends on whether it was a character ref or not
+			if cur - old_cur is 1
+				# not a character reference
+				if t.text is "\u000d" or t.text is "\u000a"
+					return
+			else
+				if t.text is "\u000a"
+					return
+		# not a "newline"
+		cur = old_cur
+		return
+
 	# tree constructor initialization
 	# see comments on TYPE_TAG/etc for the structure of this data
 	txt = args.html
@@ -4657,19 +4683,20 @@ parse_html = (args) ->
 
 	# text pre-processing
 	# FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
-	txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
 	txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
 	txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 
-	if args.name is "tests23.dat #1"
+	if args.name is "webkit01.dat #12"
 		console.log "hi"
 	# proccess input
 	# http://www.w3.org/TR/html5/syntax.html#tree-construction
-	while flag_parsing
-		t = tok_state()
-		if t?
-			process_token t
-			# fixfull parse error if has self-closing flag, but it wasn't acknolwedged
+	parse_main_loop = ->
+		while flag_parsing
+			t = tok_state()
+			if t?
+				process_token t
+				# fixfull parse error if has self-closing flag, but it wasn't acknolwedged
+	parse_main_loop()
 	return doc.children
 
 serialize_els = (els, shallow, show_ids) ->