loop
if node_i is open_els.length - 1
last = true
- # fixfull (fragment case)
-
+ if flag_fragment_parsing
+ node = context_element
# 4. If node is a select element, run these substeps:
if node.name is 'select' and node.namespace is NS_HTML
# 1. If last is true, jump to the step below labeled done.
if t.type is TYPE_START_TAG and t.name is 'html'
el = token_to_element t, NS_HTML, doc
doc.children.push el
+ el.document = doc
open_els.unshift(el)
# fixfull (big paragraph in spec about manifest, fragment, urls, etc)
ins_mode = ins_mode_before_head
# Anything else
el = token_to_element new_open_tag('html'), NS_HTML, doc
doc.children.push el
- el.parent = doc
+ el.document = doc
open_els.unshift el
# ?fixfull browsing context
ins_mode = ins_mode_before_head
# see comments on TYPE_TAG/etc for the structure of this data
txt = args.html
cur = 0
- doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
+ doc = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
+ fragment_root = null # fragment parsing algorithm returns children of this
open_els = []
afe = [] # active formatting elements
template_ins_modes = []
temporary_buffer = null
pending_table_character_tokens = []
head_element_pointer = null
- flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
- context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
+ flag_fragment_parsing = false
+ context_element = null
prev_node_id = 0 # just for debugging
# tokenizer initialization
tok_state = tok_state_data
- # text pre-processing
- # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
- txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
- txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
+ parse_init = ->
+ # fragment parsing (text arg)
+ if args.fragment?
+ # this handles the fragment from the tests in the format described here:
+ # https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
+ f = args.fragment
+ ns = NS_HTML
+ if f.substr(0, 5) is 'math '
+ f = f.substr 5
+ ns = NS_MATHML
+ else if f.substr(0, 4) is 'svg '
+ f = f.substr 4
+ ns = NS_SVG
+ t = new_open_tag f
+ context_element = token_to_element t, ns
+ context_element.document = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
+ context_element.document.flag 'quirks mode', QUIRKS_NO
+ # fragment parsing (Node arg)
+ if args.context?
+ context_element = args.context
+
+ # http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
+ # fragment parsing algorithm
+ if context_element?
+ flag_fragment_parsing = true
+ doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
+ # search up the tree from context, to try to find it's document,
+ # because this file only puts a "document" property on the root
+ # element.
+ old_doc = null
+ el = context_element
+ loop
+ if el.document?
+ old_doc = el.document
+ break
+ if el.parent
+ el = el.parent
+ else
+ break
+ if old_doc
+ doc.flag 'quirks mode', old_doc.flag 'quirks mode'
+ # set tok_state
+ if context_element.namespace is NS_HTML
+ switch context_element.name
+ when 'title', 'textarea'
+ tok_state = tok_state_rcdata
+ when 'style', 'xmp', 'iframe', 'noembed', 'noframes'
+ tok_state = tok_state_rawtext
+ when 'script'
+ tok_state = tok_state_script_data
+ when 'noscript'
+ if flag_scripting
+ tok_state = tok_state_rawtext
+ when 'plaintext'
+ tok_state = tok_state_plaintext
+ fragment_root = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
+ doc.children.push fragment_root
+ fragment_root.document = doc
+ open_els = [fragment_root]
+ if context_element.name is 'template' and context_element.namespace is NS_HTML
+ template_ins_modes.unshift ins_mode_in_template
+ # fixfull create token for context (it should have it's original one already)
+ reset_ins_mode()
+ # set form_element pointer... in the foreign doc?!
+ el = context_element
+ loop
+ if el.name is 'form' and el.namespace is NS_HTML
+ form_element_pointer = el
+ break
+ if el.parent
+ el = el.parent
+ else
+ break
+
+ # text pre-processing
+ # FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
+ txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
+ txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
- if args.name is "webkit01.dat #12"
- console.log "hi"
- # proccess input
# http://www.w3.org/TR/html5/syntax.html#tree-construction
parse_main_loop = ->
while flag_parsing
if t?
process_token t
# fixfull parse error if has self-closing flag, but it wasn't acknolwedged
+ return
+ parse_init()
parse_main_loop()
+
+ if flag_fragment_parsing
+ return fragment_root.children
return doc.children
serialize_els = (els, shallow, show_ids) ->