JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
change parser api (breaks editor)
authorJason Woofenden <jason@jasonwoof.com>
Fri, 12 May 2017 19:26:34 +0000 (15:26 -0400)
committerJason Woofenden <jason@jasonwoof.com>
Fri, 12 May 2017 19:26:34 +0000 (15:26 -0400)
parser.js
parser_tests.js

index 7df5113..de9e9ed 100644 (file)
--- a/parser.js
+++ b/parser.js
@@ -1,5 +1,5 @@
-// todo remove refs and lens, js, ls
-// run test suite!
+// todo remove unused variables
+// todo remove debug log, or make a way to access it
 
 // Copyright 2015 Jason Woofenden
 // This file implements an HTML5 parser
 
 // Copyright 2015 Jason Woofenden
 // This file implements an HTML5 parser
 //   2: c "next", "after", "lower", "below"
 //   1: b
 //   0: a "end of the list", "current node", "bottommost", "last"
 //   2: c "next", "after", "lower", "below"
 //   1: b
 //   0: a "end of the list", "current node", "bottommost", "last"
+(function () {
+
+var NS_HTML, NS_MATHML, NS_SVG, QUIRKS_LIMITED, QUIRKS_NO, QUIRKS_YES, TYPE_AAA_BOOKMARK, TYPE_AFE_MARKER, TYPE_COMMENT, TYPE_DOCTYPE, TYPE_END_TAG, TYPE_EOF, TYPE_START_TAG, TYPE_TAG, TYPE_TEXT, _decode_named_char_ref, adjust_foreign_attributes, adjust_mathml_attributes, adjust_svg_attributes, adp_els, alnum, context, debug_log, debug_log_each, debug_log_reset, decode_named_char_ref, decode_named_char_ref_cache, decode_named_char_ref_el, digits, el_is_special, el_is_special_not_adp, end_tag_implied, exports, foreign_attr_fixes, formatting_elements, foster_parenting_targets, from_code_point, g_debug_log, h_tags, hex_chars, is_html_integration, is_input_hidden_tok, is_lc_alpha, is_mathml_text_integration_point, is_space, is_space_tok, is_uc_alpha, lc_alpha, legacy_char_refs, mathml_elements, mathml_text_integration, new_aaa_bookmark, new_afe_marker, new_character_token, new_comment_token, new_doctype_token, new_element, new_end_tag, new_eof_token, new_open_tag, new_text_node, parse_html, prev_node_id, quirks_yes_pi_prefixes, space_chars, special_elements, svg_attribute_fixes, svg_elements, svg_name_fixes, tag_name_chars, uc_alpha, unicode_fixes, whitespace_chars
 
 if ((typeof module) !== 'undefined' && (module.exports != null)) {
        context = 'module'
 
 if ((typeof module) !== 'undefined' && (module.exports != null)) {
        context = 'module'
-       exports = module.exports
 } else {
        context = 'browser'
        window.peach_parser = {}
 } else {
        context = 'browser'
        window.peach_parser = {}
-       exports = window.peach_parser
 }
 
 from_code_point = function (x) {
 }
 
 from_code_point = function (x) {
@@ -704,7 +705,7 @@ decode_named_char_ref = function (txt) {
 }
 
 parse_html = function (args_html, args) {
 }
 
 parse_html = function (args_html, args) {
-       var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, ref, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
+       var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
        if (args == null) {
                args = {}
        }
        if (args == null) {
                args = {}
        }
@@ -743,7 +744,7 @@ parse_html = function (args_html, args) {
        // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
        // "Noah's Ark clause" but with three
        afe_push = function (new_el) {
        // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
        // "Noah's Ark clause" but with three
        afe_push = function (new_el) {
-               var attrs_match, el, i, j, k, len, matches, ref, ref1, v
+               var attrs_match, el, i, j, k, matches, v
                matches = 0
                for (i = 0; i < afe.length; ++i) {
                        el = afe[i]
                matches = 0
                for (i = 0; i < afe.length; ++i) {
                        el = afe[i]
@@ -1153,7 +1154,7 @@ parse_html = function (args_html, args) {
        //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
        //   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
        adoption_agency = function (subject) {
        //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
        //   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
        adoption_agency = function (subject) {
-               var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, ref, ref1, s, t, u, w, y, z
+               var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, s, t, u, w, y, z
 // this block implements tha W3C spec
 //             # 1. If the current node is an HTML element whose tag name is subject,
 //             # then run these substeps:
 // this block implements tha W3C spec
 //             # 1. If the current node is an HTML element whose tag name is subject,
 //             # then run these substeps:
@@ -1566,7 +1567,7 @@ parse_html = function (args_html, args) {
        // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
        // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
        adjusted_insertion_location = function (override_target) {
        // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
        // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
        adjusted_insertion_location = function (override_target) {
-               var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, ref, target, target_i
+               var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, target, target_i
                // 1. If there was an override target specified, then let target be the
                // override target.
                if (override_target != null) {
                // 1. If there was an override target specified, then let target be the
                // override target.
                if (override_target != null) {
@@ -2150,7 +2151,7 @@ parse_html = function (args_html, args) {
                }
        }
        ins_mode_in_body = function (t) {
                }
        }
        ins_mode_in_body = function (t) {
-               var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, ref, ref1, ref2, ref3, ref4, root_attrs, s, second, second_i, u, w, y, z
+               var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, root_attrs, s, second, second_i, u, w, y, z
                if (t.type === TYPE_TEXT && t.text === "\u0000") {
                        parse_error()
                        return
                if (t.type === TYPE_TEXT && t.text === "\u0000") {
                        parse_error()
                        return
@@ -6069,17 +6070,10 @@ parse_html = function (args_html, args) {
        return doc.children
 }
 
        return doc.children
 }
 
-exports.parse = parse_html
-exports.Node = Node
-exports.debug_log_reset = debug_log_reset
-exports.debug_log_each = debug_log_each
-exports.TYPE_TAG = TYPE_TAG
-exports.TYPE_TEXT = TYPE_TEXT
-exports.TYPE_COMMENT = TYPE_COMMENT
-exports.TYPE_DOCTYPE = TYPE_DOCTYPE
-exports.NS_HTML = NS_HTML
-exports.NS_MATHML = NS_MATHML
-exports.NS_SVG = NS_SVG
-exports.QUIRKS_NO = QUIRKS_NO
-exports.QUIRKS_LIMITED = QUIRKS_LIMITED
-exports.QUIRKS_YES = QUIRKS_YES
+if (context === 'module') {
+       module.exports = parse_html
+} else {
+       window.peach_parser = parse_html
+}
+
+}).call(this)
index 45b75e0..5a4f9f2 100644 (file)
@@ -7949,12 +7949,12 @@ serialize_els = function (els, prefix) {
        for (i = 0; i < els.length; ++i) {
                el = els[i]
                switch (el.type) {
        for (i = 0; i < els.length; ++i) {
                el = els[i]
                switch (el.type) {
-                       case peach_parser.TYPE_TAG:
+                       case "tag":
                                ret += prefix + "<"
                                ret += prefix + "<"
-                               if (el.namespace === peach_parser.NS_MATHML) {
+                               if (el.namespace === "mathml") {
                                        ret += "math "
                                }
                                        ret += "math "
                                }
-                               if (el.namespace === peach_parser.NS_SVG) {
+                               if (el.namespace === "svg") {
                                        ret += "svg "
                                }
                                ret += el.name + ">\n"
                                        ret += "svg "
                                }
                                ret += el.name + ">\n"
@@ -7967,20 +7967,20 @@ serialize_els = function (els, prefix) {
                                        k = attr_keys[j]
                                        ret += prefix + "  " + k + "=\"" + el.attrs[k] + "\"\n"
                                }
                                        k = attr_keys[j]
                                        ret += prefix + "  " + k + "=\"" + el.attrs[k] + "\"\n"
                                }
-                               if (el.name === 'template' && el.namespace === peach_parser.NS_HTML) {
+                               if (el.name === 'template' && el.namespace === "html") {
                                        ret += prefix + "  content\n"
                                        ret += serialize_els(el.children, prefix + "    ")
                                } else {
                                        ret += serialize_els(el.children, prefix + "  ")
                                }
                        break
                                        ret += prefix + "  content\n"
                                        ret += serialize_els(el.children, prefix + "    ")
                                } else {
                                        ret += serialize_els(el.children, prefix + "  ")
                                }
                        break
-                       case peach_parser.TYPE_TEXT:
+                       case "text":
                                ret += prefix + "\"" + el.text + "\"\n"
                        break
                                ret += prefix + "\"" + el.text + "\"\n"
                        break
-                       case peach_parser.TYPE_COMMENT:
+                       case "comment":
                                ret += prefix + "<!-- " + el.text + " -->\n"
                        break
                                ret += prefix + "<!-- " + el.text + " -->\n"
                        break
-                       case peach_parser.TYPE_DOCTYPE:
+                       case "doctype":
                                ret += prefix + "<!DOCTYPE " + el.name
                                if (((el.public_identifier != null) && el.public_identifier.length > 0) || ((el.system_identifier != null) && el.system_identifier.length > 0)) {
                                        ret += " \"" + ((ref = el.public_identifier) != null ? ref : '') + "\""
                                ret += prefix + "<!DOCTYPE " + el.name
                                if (((el.public_identifier != null) && el.public_identifier.length > 0) || ((el.system_identifier != null) && el.system_identifier.length > 0)) {
                                        ret += " \"" + ((ref = el.public_identifier) != null ? ref : '') + "\""
@@ -7998,20 +7998,16 @@ serialize_els = function (els, prefix) {
 test_results = { passed: 0, failed: 0 }
 test_parser = function (args) {
        var parse_errors, parsed, prev_node_id, serialized
 test_results = { passed: 0, failed: 0 }
 test_parser = function (args) {
        var parse_errors, parsed, prev_node_id, serialized
-       peach_parser.debug_log_reset()
        parse_errors = []
        args.error_cb = function (i) {
                parse_errors.push(i)
        }
        prev_node_id = 0 // reset counter
        parse_errors = []
        args.error_cb = function (i) {
                parse_errors.push(i)
        }
        prev_node_id = 0 // reset counter
-       parsed = peach_parser.parse(args.html, args)
+       parsed = peach_parser(args.html, args)
        serialized = serialize_els(parsed)
        if (serialized !== args.expected) {
                test_results.failed += 1
                if (test_results.failed === 1) {
        serialized = serialize_els(parsed)
        if (serialized !== args.expected) {
                test_results.failed += 1
                if (test_results.failed === 1) {
-                       peach_parser.debug_log_each(function (str) {
-                               console.log(str)
-                       })
                        console.log("FAILED: \"" + args.name + "\"")
                        console.log("      Input: " + args.html)
                        if (args.fragment != null) {
                        console.log("FAILED: \"" + args.name + "\"")
                        console.log("      Input: " + args.html)
                        if (args.fragment != null) {