From: Jason Woofenden Date: Mon, 14 Dec 2015 20:21:24 +0000 (-0500) Subject: parse end tags, close tags with proper nesting X-Git-Url: https://jasonwoof.com/gitweb/?a=commitdiff_plain;h=50c30e8b7c0663890369776992b1f57876e6226e;p=peach-html5-editor.git parse end tags, close tags with proper nesting --- diff --git a/parse-html.coffee b/parse-html.coffee index a9e1ce0..db1837b 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -32,7 +32,8 @@ TYPE_WHITESPACE = 2 TYPE_COMMENT = 3 # the following types are emited by the tokenizer, but shouldn't end up in the tree: TYPE_OPEN_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children] -TYPE_EOF = 5 +TYPE_CLOSE_TAG = 5 # name +TYPE_EOF = 6 lc_alpha = "abcdefghijklmnopqrstuvwxqz" uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXQZ" @@ -210,6 +211,28 @@ parse_html = (txt) -> return [TYPE_TEXT, '<'] return null + # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state + tok_state_end_tag_open = -> + switch c = txt.charAt(cur++) + when '>' + parse_error() + tok_state = tok_state_data + when '' # EOF + parse_error() + tok_state = tok_state_data + return [TYPE_TEXT, ' -1 + tok_cur_tag = [TYPE_CLOSE_TAG, c.toLowerCase(), [], []] + tok_state = tok_state_tag_name + else if lc_alpha.indexOf(c) > -1 + tok_cur_tag = [TYPE_CLOSE_TAG, c, [], []] + tok_state = tok_state_tag_name + else + parse_error() + tok_state = tok_state_bogus_comment + return null + # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state tok_state_tag_name = -> switch c = txt.charAt(cur++) @@ -503,12 +526,22 @@ parse_html = (txt) -> open_tags[0][3].push t open_tags.unshift t # TODO implement formatting elements thing + when TYPE_CLOSE_TAG + # FIXME this is just a hack for now + if open_tags.length < 2 + parse_error() + return + if open_tags[0][1] isnt t[1] + parse_error() + # fall through and close something anyway + open_tags.shift() when TYPE_EOF return # TODO implement close tags # TODO implement self-closing tags else console.log "UNIMPLEMENTED tag type: #{t[0]}" + return # tree constructor initialization # see comments on TYPE_TAG/etc for the structure of this data @@ -550,3 +583,4 @@ test_equals "open tag with attributes of various quotings", html_to_json, "foobar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]' test_equals "attribute entity exceptions sq", html_to_json, "foobar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]' test_equals "attribute entity exceptions uq", html_to_json, "foobar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]' +test_equals "matching closing tags", html_to_json, "foohi
1
foo
2
bar", '[[1,"foo"],[0,"a",{"href":"hi"},[[1,"hi"]]],[0,"div",{},[[1,"1"],[0,"div",{},[[1,"foo"]]],[1,"2"]]],[1,"bar"]]'