From: Jason Woofenden Date: Thu, 24 Dec 2015 16:04:02 +0000 (-0500) Subject: finish implementing quirks mode checks X-Git-Url: https://jasonwoof.com/gitweb/?p=peach-html5-editor.git;a=commitdiff_plain;h=9d56a837ea14fd1324617b1a85f8a1d52db76319 finish implementing quirks mode checks --- diff --git a/parse-html.coffee b/parse-html.coffee index 3c861ba..3d4db84 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -84,6 +84,11 @@ NS_HTML = 1 NS_MATHML = 2 NS_SVG = 3 +# quirks mode constants +QUIRKS_NO = 1 +QUIRKS_LIMITED = 2 +QUIRKS_YES = 3 + g_debug_log = [] debug_log_reset = -> g_debug_log = [] @@ -249,6 +254,64 @@ unicode_fixes[0x9C] = "\u0153" unicode_fixes[0x9E] = "\u017E" unicode_fixes[0x9F] = "\u0178" +quirks_yes_pi_prefixes = [ + "+//silmaril//dtd html pro v0r11 19970101//" + "-//as//dtd html 3.0 aswedit + extensions//" + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//" + "-//ietf//dtd html 2.0 level 1//" + "-//ietf//dtd html 2.0 level 2//" + "-//ietf//dtd html 2.0 strict level 1//" + "-//ietf//dtd html 2.0 strict level 2//" + "-//ietf//dtd html 2.0 strict//" + "-//ietf//dtd html 2.0//" + "-//ietf//dtd html 2.1e//" + "-//ietf//dtd html 3.0//" + "-//ietf//dtd html 3.2 final//" + "-//ietf//dtd html 3.2//" + "-//ietf//dtd html 3//" + "-//ietf//dtd html level 0//" + "-//ietf//dtd html level 1//" + "-//ietf//dtd html level 2//" + "-//ietf//dtd html level 3//" + "-//ietf//dtd html strict level 0//" + "-//ietf//dtd html strict level 1//" + "-//ietf//dtd html strict level 2//" + "-//ietf//dtd html strict level 3//" + "-//ietf//dtd html strict//" + "-//ietf//dtd html//" + "-//metrius//dtd metrius presentational//" + "-//microsoft//dtd internet explorer 2.0 html strict//" + "-//microsoft//dtd internet explorer 2.0 html//" + "-//microsoft//dtd internet explorer 2.0 tables//" + "-//microsoft//dtd internet explorer 3.0 html strict//" + "-//microsoft//dtd internet explorer 3.0 html//" + "-//microsoft//dtd internet explorer 3.0 tables//" + "-//netscape comm. corp.//dtd html//" + "-//netscape comm. corp.//dtd strict html//" + "-//o'reilly and associates//dtd html 2.0//" + "-//o'reilly and associates//dtd html extended 1.0//" + "-//o'reilly and associates//dtd html extended relaxed 1.0//" + "-//sq//dtd html 2.0 hotmetal + extensions//" + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//" + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//" + "-//spyglass//dtd html 2.0 extended//" + "-//sun microsystems corp.//dtd hotjava html//" + "-//sun microsystems corp.//dtd hotjava strict html//" + "-//w3c//dtd html 3 1995-03-24//" + "-//w3c//dtd html 3.2 draft//" + "-//w3c//dtd html 3.2 final//" + "-//w3c//dtd html 3.2//" + "-//w3c//dtd html 3.2s draft//" + "-//w3c//dtd html 4.0 frameset//" + "-//w3c//dtd html 4.0 transitional//" + "-//w3c//dtd html experimental 19960712//" + "-//w3c//dtd html experimental 970421//" + "-//w3c//dtd w3 html//" + "-//w3o//dtd w3 html 3.0//" + "-//webtechs//dtd mozilla html 2.0//" + "-//webtechs//dtd mozilla html//" +] + # These are the character references that don't need a terminating semicolon # min length: 2, max: 6, none are a prefix of any other. legacy_char_refs = { @@ -626,24 +689,24 @@ parse_html = (args) -> # But first... the helpers template_tag_is_open = -> - for t in open_els - if t.name is 'template' and t.namespace is NS_HTML + for el in open_els + if el.name is 'template' and el.namespace is NS_HTML return true return false is_in_scope_x = (tag_name, scope, namespace) -> - for t in open_els - if t.name is tag_name and (namespace is null or namespace is t.namespace) + for el in open_els + if el.name is tag_name and (namespace is null or namespace is el.namespace) return true - if scope[t.name] is t.namespace + if scope[el.name] is el.namespace return false return false is_in_scope_x_y = (tag_name, scope, scope2, namespace) -> - for t in open_els - if t.name is tag_name and (namespace is null or namespace is t.namespace) + for el in open_els + if el.name is tag_name and (namespace is null or namespace is el.namespace) return true - if scope[t.name] is t.namespace + if scope[el.name] is el.namespace return false - if scope2[t.name] is t.namespace + if scope2[el.name] is el.namespace return false return false standard_scopers = { @@ -1419,6 +1482,35 @@ parse_html = (args) -> # 8.2.5.4.1 The "initial" insertion mode # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode + is_quirks_yes_doctype = (t) -> + if t.flag 'force-quirks' + return true + if t.name isnt 'html' + return true + if t.public_identifier? + pi = t.public_identifier.toLowerCase() + for p in quirks_yes_pi_prefixes + if pi.substr(0, p.length) is p + return true + if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html' + return true + if t.system_identifier? + if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd' + return true + else if t.public_identifier? + # already did this: pi = t.public_identifier.toLowerCase() + if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//' + return true + return false + is_quirks_limited_doctype = (t) -> + if t.public_identifier? + pi = t.public_identifier.toLowerCase() + if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//' + return true + if t.system_identifier? + if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//' + return true + return false ins_mode_initial = (t) -> if is_space_tok t return @@ -1427,13 +1519,20 @@ parse_html = (args) -> doc.children.push t return if t.type is TYPE_DOCTYPE - # FIXME check identifiers, set quirks, etc - # fixfull + # fixfull syntax error from first paragraph and following bullets + # fixfull set doc.doctype + # fixfull is the "not an iframe srcdoc" thing relevant? + if is_quirks_yes_doctype t + doc.flag 'quirks mode', QUIRKS_YES + else if is_quirks_limited_doctype t + doc.flag 'quirks mode', QUIRKS_LIMITED doc.children.push t ins_mode = ins_mode_before_html return # Anything else - #fixfull (iframe, quirks) + # fixfull not iframe srcdoc? + parse_error() + doc.flag 'quirks mode', QUIRKS_YES ins_mode = ins_mode_before_html process_token t return @@ -1462,9 +1561,9 @@ parse_html = (args) -> parse_error() return # Anything else - html_tok = new_open_tag 'html' - el = token_to_element html_tok, NS_HTML, doc + el = token_to_element new_open_tag('html'), NS_HTML, doc doc.children.push el + el.parent = doc open_els.unshift el # ?fixfull browsing context ins_mode = ins_mode_before_head @@ -1496,8 +1595,7 @@ parse_html = (args) -> parse_error() return # Anything else - head_tok = new_open_tag 'head' - el = insert_html_element head_tok + el = insert_html_element new_open_tag 'head' head_element_pointer = el ins_mode = ins_mode_in_head process_token t @@ -2034,14 +2132,15 @@ parse_html = (args) -> clear_afe_to_marker() return if t.type is TYPE_START_TAG and t.name is 'table' - close_p_if_in_button_scope() # fixfull quirksmode thing + unless doc.flag('quirks mode') is QUIRKS_YES + close_p_if_in_button_scope() # test insert_html_element t flag_frameset_ok = false ins_mode = ins_mode_in_table return if t.type is TYPE_END_TAG and t.name is 'br' parse_error() - t.type is TYPE_START_TAG + t.type = TYPE_START_TAG # fall through if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr') reconstruct_afe() @@ -4522,6 +4621,7 @@ parse_html = (args) -> txt = args.html cur = 0 doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML + doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this open_els = [] afe = [] # active formatting elements template_ins_modes = [] @@ -4548,7 +4648,7 @@ parse_html = (args) -> txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this - if args.name is "tests18.dat #17" + if args.name is "tests20.dat #22" console.log "hi" # proccess input # http://www.w3.org/TR/html5/syntax.html#tree-construction @@ -4578,3 +4678,6 @@ module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE module.exports.NS_HTML = NS_HTML module.exports.NS_MATHML = NS_MATHML module.exports.NS_SVG = NS_SVG +module.exports.QUIRKS_NO = QUIRKS_NO +module.exports.QUIRKS_LIMITED = QUIRKS_LIMITED +module.exports.QUIRKS_YES = QUIRKS_YES