From 45a9823ee371376bec0c8295996ccf07c7d76580 Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Wed, 23 Dec 2015 12:05:17 -0500 Subject: [PATCH] finish ins_mode_in_body and missing tok_state --- parse-html.coffee | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 158 insertions(+), 6 deletions(-) diff --git a/parse-html.coffee b/parse-html.coffee index 8fdba4f..a6894cd 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -92,6 +92,7 @@ class Node @namespace = args.namespace ? NS_HTML @parent = args.parent ? null @token = args.token ? null + @flags = args.flags ? {} if args.id? @id = "#{args.id}+" else @@ -101,8 +102,11 @@ class Node @token.flag 'did_self_close' else @flag 'did_self_close', true - flag: -> - # fixfull + flag: (key, value = null) -> + if value? + @flags[key] = value + else + return @flags[key] serialize: (shallow = false, show_ids = false) -> # for unit tests ret = '' switch @type @@ -320,6 +324,7 @@ h_tags = { h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML } +# FIXME namespacify foster_parenting_targets = { table: true tbody: true @@ -328,6 +333,7 @@ foster_parenting_targets = { tr: true } +# FIXME namespacify # all html I presume end_tag_implied = { dd: true @@ -349,6 +355,84 @@ adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML } el_is_special_not_adp = (el) -> return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace +svg_attribute_fixes = { + attributename: 'attributeName' + attributetype: 'attributeType' + basefrequency: 'baseFrequency' + baseprofile: 'baseProfile' + calcmode: 'calcMode' + clippathunits: 'clipPathUnits' + contentscripttype: 'contentScriptType' + contentstyletype: 'contentStyleType' + diffuseconstant: 'diffuseConstant' + edgemode: 'edgeMode' + externalresourcesrequired: 'externalResourcesRequired' + filterres: 'filterRes' + filterunits: 'filterUnits' + glyphref: 'glyphRef' + gradienttransform: 'gradientTransform' + gradientunits: 'gradientUnits' + kernelmatrix: 'kernelMatrix' + kernelunitlength: 'kernelUnitLength' + keypoints: 'keyPoints' + keysplines: 'keySplines' + keytimes: 'keyTimes' + lengthadjust: 'lengthAdjust' + limitingconeangle: 'limitingConeAngle' + markerheight: 'markerHeight' + markerunits: 'markerUnits' + markerwidth: 'markerWidth' + maskcontentunits: 'maskContentUnits' + maskunits: 'maskUnits' + numoctaves: 'numOctaves' + pathlength: 'pathLength' + patterncontentunits: 'patternContentUnits' + patterntransform: 'patternTransform' + patternunits: 'patternUnits' + pointsatx: 'pointsAtX' + pointsaty: 'pointsAtY' + pointsatz: 'pointsAtZ' + preservealpha: 'preserveAlpha' + preserveaspectratio: 'preserveAspectRatio' + primitiveunits: 'primitiveUnits' + refx: 'refX' + refy: 'refY' + repeatcount: 'repeatCount' + repeatdur: 'repeatDur' + requiredextensions: 'requiredExtensions' + requiredfeatures: 'requiredFeatures' + specularconstant: 'specularConstant' + specularexponent: 'specularExponent' + spreadmethod: 'spreadMethod' + startoffset: 'startOffset' + stddeviation: 'stdDeviation' + stitchtiles: 'stitchTiles' + surfacescale: 'surfaceScale' + systemlanguage: 'systemLanguage' + tablevalues: 'tableValues' + targetx: 'targetX' + targety: 'targetY' + textlength: 'textLength' + viewbox: 'viewBox' + viewtarget: 'viewTarget' + xchannelselector: 'xChannelSelector' + ychannelselector: 'yChannelSelector' + zoomandpan: 'zoomAndPan' +} +adjust_mathml_attributes = (t) -> + for a in t.attrs_a + if a[0] is 'definitionurl' + a[0] = 'definitionURL' + return +adjust_svg_attributes = (t) -> + for a in t.attrs_a + if svg_attribute_fixes[a[0]]? + a[0] = svg_attribute_fixes[a[0]] + return +adjust_foreign_attributes = (t) -> + # fixfull + return + # decode_named_char_ref() # # The list of named character references is _huge_ so ask the browser to decode @@ -1120,7 +1204,8 @@ parse_html = (txt, parse_error_cb = null) -> open_els.unshift el return el # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element - insert_html_element = insert_foreign_element # (token, namespace) -> + insert_html_element = (token) -> + insert_foreign_element token, NS_HTML # http://www.w3.org/TR/html5/syntax.html#insert-a-comment # position should be [node, index_within_children] @@ -1862,15 +1947,70 @@ parse_html = (txt, parse_error_cb = null) -> flag_frameset_ok = false parse_generic_raw_text t return - - # FIXME CONTINUE - + if t.type is TYPE_START_TAG and t.name is 'iframe' + flag_frameset_ok = false + parse_generic_raw_text t + return + if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting)) + parse_generic_raw_text t + return + if t.type is TYPE_START_TAG and t.name is 'select' + reconstruct_afe() + insert_html_element t + flag_frameset_ok = false + if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell + ins_mode = ins_mode_in_select_in_table + else + ins_mode = ins_mode_in_select + return + if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option') + if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML + open_els.shift() + reconstruct_afe() + insert_html_element t + return + if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc') + if is_in_scope 'ruby', NS_HTML + generate_implied_end_tags() + unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML + parse_error() + insert_html_element t + return + if t.type is TYPE_START_TAG and t.name is 'rt' + if is_in_scope 'ruby', NS_HTML + generate_implied_end_tags 'rtc' # arg is exception + unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML + parse_error() + insert_html_element t + return + if t.type is TYPE_START_TAG and t.name is 'math' + reconstruct_afe() + adjust_mathml_attributes t + adjust_foreign_attributes t + insert_foreign_element t, NS_MATHML + if t.flag 'self-closing' + open_els.shift() + t.acknowledge_self_closing() + return + if t.type is TYPE_START_TAG and t.name is 'svg' + reconstruct_afe() + adjust_svg_attributes t + adjust_foreign_attributes t + insert_foreign_element t, NS_SVG + if t.flag 'self-closing' + open_els.shift() + t.acknowledge_self_closing() + return + if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr') + parse_error() + return if t.type is TYPE_START_TAG # any other start tag reconstruct_afe() insert_html_element t return if t.type is TYPE_END_TAG # any other end tag in_body_any_other_end_tag t.name + return return ins_mode_in_table_else = (t) -> @@ -3981,6 +4121,18 @@ parse_html = (txt, parse_error_cb = null) -> # Anything else return null + # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state + tok_state_cdata_section = -> + tok_state = tok_state_data + next_gt = txt.indexOf ']]>', cur + if next_gt is -1 + val = txt.substr cur + cur = txt.length + else + val = txt.substr cur, (next_gt - cur) + cur = next_gt + 3 + val = val.replace "\u0000", "\ufffd" # fixfull spec doesn't say this + return new_character_token val # fixfull split # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference # Don't set this as a state, just call it -- 1.7.10.4