#
# Deviations from that spec:
#
-# Purposeful: search this file for "WTAG"
+# Purposeful: search this file for "WHATWG"
#
# Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
@id = "#{++prev_node_id}"
acknowledge_self_closing: ->
if @token?
- @token.flag 'did_self_close'
+ @token.flag 'did_self_close', true
else
@flag 'did_self_close', true
flag: (key, value = null) ->
img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
- menu:NS_HTML,menuitem:NS_HTML, # WATWG adds these
+ menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
diffuseconstant: 'diffuseConstant'
edgemode: 'edgeMode'
externalresourcesrequired: 'externalResourcesRequired'
- filterres: 'filterRes'
+ # WHATWG removes this: filterres: 'filterRes'
filterunits: 'filterUnits'
glyphref: 'glyphRef'
gradienttransform: 'gradientTransform'
ychannelselector: 'yChannelSelector'
zoomandpan: 'zoomAndPan'
}
+foreign_attr_fixes = {
+ 'xlink:actuate': 'xlink actuate'
+ 'xlink:arcrole': 'xlink arcrole'
+ 'xlink:href': 'xlink href'
+ 'xlink:role': 'xlink role'
+ 'xlink:show': 'xlink show'
+ 'xlink:title': 'xlink title'
+ 'xlink:type': 'xlink type'
+ 'xml:base': 'xml base'
+ 'xml:lang': 'xml lang'
+ 'xml:space': 'xml space'
+ 'xmlns': 'xmlns'
+ 'xmlns:xlink': 'xmlns xlink'
+}
adjust_mathml_attributes = (t) ->
for a in t.attrs_a
if a[0] is 'definitionurl'
return
adjust_foreign_attributes = (t) ->
# fixfull
+ for a in t.attrs_a
+ if foreign_attr_fixes[a[0]]?
+ a[0] = foreign_attr_fixes[a[0]]
return
# decode_named_char_ref()
standard_scopers = {
applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
- template: NS_HTML, mi: NS_MATHML,
+ template: NS_HTML,
- mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
- 'annotation-xml': NS_MATHML,
+ mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
+ mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
}
debug_log "tree: #{serialize_els doc.children, false, true}"
debug_log "open_els: #{serialize_els open_els, true, true}"
debug_log "afe: #{serialize_els afe, true, true}"
+# this block implements tha W3C spec
+# # 1. If the current node is an HTML element whose tag name is subject,
+# # then run these substeps:
+# #
+# # 1. Let element be the current node.
+# #
+# # 2. Pop element off the stack of open elements.
+# #
+# # 3. If element is also in the list of active formatting elements,
+# # remove the element from the list.
+# #
+# # 4. Abort the adoption agency algorithm.
+# if open_els[0].name is subject and open_els[0].namespace is NS_HTML
+# el = open_els.shift()
+# # remove it from the list of active formatting elements (if found)
+# for t, i in afe
+# if t is el
+# afe.splice i, 1
+# break
+# debug_log "aaa: starting off with subject on top of stack, exiting"
+# return
+# WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
+ # If the current node is an HTML element whose tag name is subject, and
+ # the current node is not in the list of active formatting elements,
+ # then pop the current node off the stack of open elements, and abort
+ # these steps.
if open_els[0].name is subject and open_els[0].namespace is NS_HTML
- el = open_els[0]
- open_els.shift()
+ debug_log "aaa: starting off with subject on top of stack, exiting"
# remove it from the list of active formatting elements (if found)
- for t, i in afe
- if t is el
- afe.splice i, 1
+ in_afe = false
+ for el, i in afe
+ if el is open_els[0]
+ in_afe = true
break
- debug_log "aaa: starting off with subject on top of stack, exiting"
- return
+ unless in_afe
+ debug_log "aaa: ...and not in afe, aaa done"
+ open_els.shift()
+ return
+ # fall through
+# END WHATWG
outer = 0
loop
if outer >= 8
ins_mode t
return
if is_mathml_text_integration_point(acn)
- if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
+ if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
ins_mode t
return
if t.type is TYPE_TEXT
parse_error()
open_els.unshift head_element_pointer
ins_mode_in_head t
- for el, i of open_els
+ for el, i in open_els
if el is head_element_pointer
open_els.splice i, 1
return
parse_error()
return if template_tag_is_open()
root_attrs = open_els[open_els.length - 1].attrs
- for a of t.attrs_a
+ for a in t.attrs_a
root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
return
return unless second.name is 'body'
return if template_tag_is_open()
flag_frameset_ok = false
- for a of t.attrs_a
+ for a in t.attrs_a
second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
return
if t.type is TYPE_START_TAG and t.name is 'frameset'
# parse_error()
# insert_html_element t
# return
-# below implements the WATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
+# below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
if is_in_scope 'ruby', NS_HTML
generate_implied_end_tags()
parse_error()
insert_html_element t
return
-# end WATWG chunk
+# end WHATWG chunk
if t.type is TYPE_START_TAG and t.name is 'math'
reconstruct_afe()
adjust_mathml_attributes t
ins_mode_in_body t
return
if t.type is TYPE_COMMENT
- insert_comment t, [open_els[0], open_els[0].children.length]
+ first = open_els[open_els.length - 1]
+ insert_comment t, [first, first.children.length]
return
if t.type is TYPE_DOCTYPE
parse_error()
ins_mode_in_body t
return
if t.type is TYPE_END_TAG and t.name is 'html'
- # fixfull fragment case
+ if flag_fragment_parsing
+ parse_error()
+ return
ins_mode = ins_mode_after_after_body
return
if t.type is TYPE_EOF
ins_mode_in_body t
return
if t.type is TYPE_END_TAG and t.name is 'html'
- insert_mode = ins_mode_after_after_frameset
+ ins_mode = ins_mode_after_after_frameset
return
if t.type is TYPE_START_TAG and t.name is 'noframes'
ins_mode_in_head t
# Anything else
parse_error()
ins_mode = ins_mode_in_body
+ process_token t
return
# 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
if t.name is 'script'
t.acknowledge_self_closing()
in_foreign_content_end_script()
+ # fixfull
else
open_els.shift()
t.acknowledge_self_closing()
return
loop # is this safe?
open_els.shift()
- cn = open_els[0]
- if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
+ if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
break
process_token t
return
in_foreign_content_end_script()
return
if t.type is TYPE_END_TAG
- if open_els[0].name.toLowerCase() isnt t.name
+ i = 0
+ node = open_els[i]
+ if node.name.toLowerCase() isnt t.name
parse_error()
- for node in open_els
+ loop
if node is open_els[open_els.length - 1]
return
if node.name.toLowerCase() is t.name
el = open_els.shift()
if el is node
return
+ i += 1
+ node = open_els[i]
if node.namespace is NS_HTML
break
ins_mode t # explicitly call HTML insertion mode
# 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
tok_state_tag_open = ->
- switch c = txt.charAt(cur++)
- when '!'
- tok_state = tok_state_markup_declaration_open
- when '/'
- tok_state = tok_state_end_tag_open
- when '?'
- parse_error()
- tok_cur_tag = new_comment_token '?'
- tok_state = tok_state_bogus_comment
- else
- if is_lc_alpha(c)
- tok_cur_tag = new_open_tag c
- tok_state = tok_state_tag_name
- else if is_uc_alpha(c)
- tok_cur_tag = new_open_tag c.toLowerCase()
- tok_state = tok_state_tag_name
- else
- parse_error()
- tok_state = tok_state_data
- cur -= 1 # we didn't parse/handle the char after <
- return new_text_node '<'
- return null
+ c = txt.charAt(cur++)
+ if c is '!'
+ tok_state = tok_state_markup_declaration_open
+ return
+ if c is '/'
+ tok_state = tok_state_end_tag_open
+ return
+ if is_uc_alpha(c)
+ tok_cur_tag = new_open_tag c.toLowerCase()
+ tok_state = tok_state_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_open_tag c
+ tok_state = tok_state_tag_name
+ return
+ if c is '?'
+ parse_error()
+ tok_cur_tag = new_comment_token '?' # FIXME right?
+ tok_state = tok_state_bogus_comment
+ return
+ # Anything else
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # we didn't parse/handle the char after <
+ return new_text_node '<'
# 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
tok_state_end_tag_open = ->
- switch c = txt.charAt(cur++)
- when '>'
- parse_error()
- tok_state = tok_state_data
- when '' # EOF
- parse_error()
- tok_state = tok_state_data
- return new_text_node '</'
- else
- if is_uc_alpha(c)
- tok_cur_tag = new_end_tag c.toLowerCase()
- tok_state = tok_state_tag_name
- else if is_lc_alpha(c)
- tok_cur_tag = new_end_tag c
- tok_state = tok_state_tag_name
- else
- parse_error()
- tok_cur_tag = new_comment_token '/'
- tok_state = tok_state_bogus_comment
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ tok_state = tok_state_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ tok_state = tok_state_tag_name
+ return
+ if c is '>'
+ parse_error()
+ tok_state = tok_state_data
+ return
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ return new_text_node '</'
+ # Anything else
+ parse_error()
+ tok_cur_tag = new_comment_token c
+ tok_state = tok_state_bogus_comment
return null
# 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
# Anything else
tok_state = tok_state_script_data_escaped
cur -= 1 # Reconsume
- return new_character_token c
+ return new_character_token '<'
# 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
tok_state_script_data_escaped_end_tag_open = ->
tok_state_self_closing_start_tag = ->
c = txt.charAt(cur++)
if c is '>'
- tok_cur_tag.flag 'self-closing'
+ tok_cur_tag.flag 'self-closing', true
tok_state = tok_state_data
return tok_cur_tag
if c is ''
head_element_pointer = null
flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
+ prev_node_id = 0 # just for debugging
# tokenizer initialization
tok_state = tok_state_data
txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
- if args.name is "plain-text-unsafe.dat #4"
+ if args.name is "tests18.dat #17"
console.log "hi"
# proccess input
# http://www.w3.org/TR/html5/syntax.html#tree-construction