X-Git-Url: https://jasonwoof.com/gitweb/?a=blobdiff_plain;ds=sidebyside;f=parse-html.coffee;h=cea8fa1d7eb9ce2efee1af721eb1d5e4a48d1d69;hb=5056e7c63784b7c388e514feebdaf3bc296826c4;hp=31a46f413fc2447fd4e1d21b03531d9f5ae71c66;hpb=12e07fdf217eda724e703e32ec5c8b968bb3a727;p=peach-html5-editor.git

diff --git a/parse-html.coffee b/parse-html.coffee
index 31a46f4..cea8fa1 100644
--- a/parse-html.coffee
+++ b/parse-html.coffee
@@ -512,8 +512,9 @@ decode_named_char_ref = (txt) ->
 	return null if decoded is txt
 	return g_dncr.cache[txt] = decoded
 
-parse_html = (txt, parse_error_cb = null) ->
-	cur = 0 # index of next char in txt to be parsed
+parse_html = (args) ->
+	txt = null
+	cur = null # index of next char in txt to be parsed
 	# declare doc and tokenizer variables so they're in scope below
 	doc = null
 	open_els = null # stack of open elements
@@ -538,8 +539,8 @@ parse_html = (txt, parse_error_cb = null) ->
 		flag_parsing = false
 
 	parse_error = ->
-		if parse_error_cb?
-			parse_error_cb cur
+		if args.error_cb?
+			args.error_cb cur
 		else
 			console.log "Parse error at character #{cur} of #{txt.length}"
 
@@ -1191,7 +1192,7 @@ parse_html = (txt, parse_error_cb = null) ->
 				last_template = null
 				last_template_i = null
 				for el, i in open_els
-					if el.name is 'template'
+					if el.name is 'template' and el.namespace is NS_HTML
 						last_template = el
 						last_template_i = i
 						break
@@ -1200,7 +1201,7 @@ parse_html = (txt, parse_error_cb = null) ->
 				last_table = null
 				last_table_i
 				for el, i in open_els
-					if el.name is 'table'
+					if el.name is 'table' and el.namespace is NS_HTML
 						last_table = el
 						last_table_i = i
 						break
@@ -1222,6 +1223,7 @@ parse_html = (txt, parse_error_cb = null) ->
 					# this is odd
 					target = open_els[open_els.length - 1]
 					target_i = target.children.length
+					break
 				# 5. If last table has a parent element, then let adjusted
 				# insertion location be inside last table's parent element,
 				# immediately before last table, and abort these substeps.
@@ -2100,19 +2102,6 @@ parse_html = (txt, parse_error_cb = null) ->
 			return
 		return
 
-	ins_mode_in_table_else = (t) ->
-		parse_error()
-		flag_foster_parenting = true # FIXME
-		ins_mode_in_body t
-		flag_foster_parenting = false
-	can_in_table = { # FIXME do this inline like everywhere else
-		'table': true
-		'tbody': true
-		'tfoot': true
-		'thead': true
-		'tr': true
-	}
-
 	# 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
 	ins_mode_text = (t) ->
 		if t.type is TYPE_TEXT
@@ -2142,6 +2131,19 @@ parse_html = (txt, parse_error_cb = null) ->
 	# http://www.w3.org/TR/html5/syntax.html#tokenization
 
 	# 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
+	ins_mode_in_table_else = (t) ->
+		parse_error()
+		flag_foster_parenting = true
+		ins_mode_in_body t
+		flag_foster_parenting = false
+		return
+	can_in_table = { # FIXME do this inline like everywhere else
+		'table': true
+		'tbody': true
+		'tfoot': true
+		'thead': true
+		'tr': true
+	}
 	ins_mode_in_table = (t) ->
 		switch t.type
 			when TYPE_TEXT
@@ -2192,7 +2194,7 @@ parse_html = (txt, parse_error_cb = null) ->
 					when 'style', 'script', 'template'
 						ins_mode_in_head t
 					when 'input'
-						if is_input_hidden_tok t
+						unless is_input_hidden_tok t
 							ins_mode_in_table_else t
 						else
 							parse_error()
@@ -3040,9 +3042,9 @@ parse_html = (txt, parse_error_cb = null) ->
 	is_appropriate_end_tag = (t) ->
 		# spec says to check against "the tag name of the last start tag to
 		# have been emitted from this tokenizer", but this is only called from
-		# the various "raw" states, which I'm pretty sure all push the start
-		# token onto open_els. TODO: verify this after the script data states
-		# are implemented
+		# the various "raw" states, so it's hopefully ok to assume that
+		# open_els[0].name will work instead TODO: verify this after the script
+		# data states are implemented
 		debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
 		return t.type is TYPE_END_TAG and t.name is open_els[0].name
 
@@ -3184,6 +3186,11 @@ parse_html = (txt, parse_error_cb = null) ->
 				tok_state = tok_state_self_closing_start_tag
 				return
 			# fall through
+		if c is '>'
+			if is_appropriate_end_tag tok_cur_tag
+				tok_state = tok_state_data
+				return tok_cur_tag
+			# fall through
 		if is_uc_alpha(c)
 			tok_cur_tag.name += c.toLowerCase()
 			temporary_buffer += c
@@ -3516,16 +3523,16 @@ parse_html = (txt, parse_error_cb = null) ->
 				return tmp
 			when "\u0000"
 				parse_error()
-				tok_cur_tag.attrs_a[0][0] = "\ufffd"
+				tok_cur_tag.attrs_a[0][0] += "\ufffd"
 			when '"', "'", '<'
 				parse_error()
-				tok_cur_tag.attrs_a[0][0] = c
+				tok_cur_tag.attrs_a[0][0] += c
 			when '' # EOF
 				parse_error()
 				tok_state = tok_state_data
 			else
 				if is_uc_alpha(c)
-					tok_cur_tag.attrs_a[0][0] = c.toLowerCase()
+					tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
 				else
 					tok_cur_tag.attrs_a[0][0] += c
 		return null
@@ -4294,7 +4301,9 @@ parse_html = (txt, parse_error_cb = null) ->
 		else
 			val = txt.substr cur, (next_gt - cur)
 			cur = next_gt + 3
-		val = val.replace "\u0000", "\ufffd" # fixfull spec doesn't say this
+		val = val.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
+		val = val.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
+		val = val.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 		return new_character_token val # fixfull split
 
 	# 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
@@ -4375,13 +4384,15 @@ parse_html = (txt, parse_error_cb = null) ->
 
 	# tree constructor initialization
 	# see comments on TYPE_TAG/etc for the structure of this data
+	txt = args.html
+	cur = 0
 	doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
 	open_els = []
 	afe = [] # active formatting elements
 	template_ins_modes = []
 	ins_mode = ins_mode_initial
 	original_ins_mode = ins_mode # TODO check spec
-	flag_scripting = true # TODO might need an extra flag to get <noscript> to parse correctly
+	flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
 	flag_frameset_ok = true
 	flag_parsing = true
 	flag_foster_parenting = false