JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
vanilla ckeditor-3.0
[ckeditor.git] / _source / core / htmlparser.js
diff --git a/_source/core/htmlparser.js b/_source/core/htmlparser.js
new file mode 100644 (file)
index 0000000..e34fcb2
--- /dev/null
@@ -0,0 +1,212 @@
+/*\r
+Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved.\r
+For licensing, see LICENSE.html or http://ckeditor.com/license\r
+*/\r
+\r
+/**\r
+ * HTML text parser.\r
+ * @constructor\r
+ * @example\r
+ */\r
+CKEDITOR.htmlParser = function()\r
+{\r
+       this._ =\r
+       {\r
+               htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:[^"\'>]+)|(?:"[^"]*")|(?:\'[^\']*\'))*)\\/?>))', 'g' )\r
+       };\r
+};\r
+\r
+(function()\r
+{\r
+       var attribsRegex        = /([\w:]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,\r
+               emptyAttribs    = {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};\r
+\r
+       CKEDITOR.htmlParser.prototype =\r
+       {\r
+               /**\r
+                * Function to be fired when a tag opener is found. This function\r
+                * should be overriden when using this class.\r
+                * @param {String} tagName The tag name. The name is guarantted to be\r
+                *              lowercased.\r
+                * @param {Object} attributes An object containing all tag attributes. Each\r
+                *              property in this object represent and attribute name and its\r
+                *              value is the attribute value.\r
+                * @param {Boolean} selfClosing true if the tag closes itself, false if the\r
+                *              tag doesn't.\r
+                * @example\r
+                * var parser = new CKEDITOR.htmlParser();\r
+                * parser.onTagOpen = function( tagName, attributes, selfClosing )\r
+                *     {\r
+                *         alert( tagName );  // e.g. "b"\r
+                *     });\r
+                * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
+                */\r
+               onTagOpen       : function() {},\r
+\r
+               /**\r
+                * Function to be fired when a tag closer is found. This function\r
+                * should be overriden when using this class.\r
+                * @param {String} tagName The tag name. The name is guarantted to be\r
+                *              lowercased.\r
+                * @example\r
+                * var parser = new CKEDITOR.htmlParser();\r
+                * parser.onTagClose = function( tagName )\r
+                *     {\r
+                *         alert( tagName );  // e.g. "b"\r
+                *     });\r
+                * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
+                */\r
+               onTagClose      : function() {},\r
+\r
+               /**\r
+                * Function to be fired when text is found. This function\r
+                * should be overriden when using this class.\r
+                * @param {String} text The text found.\r
+                * @example\r
+                * var parser = new CKEDITOR.htmlParser();\r
+                * parser.onText = function( text )\r
+                *     {\r
+                *         alert( text );  // e.g. "Hello"\r
+                *     });\r
+                * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
+                */\r
+               onText          : function() {},\r
+\r
+               /**\r
+                * Function to be fired when CDATA section is found. This function\r
+                * should be overriden when using this class.\r
+                * @param {String} cdata The CDATA been found.\r
+                * @example\r
+                * var parser = new CKEDITOR.htmlParser();\r
+                * parser.onCDATA = function( cdata )\r
+                *     {\r
+                *         alert( cdata );  // e.g. "var hello;"\r
+                *     });\r
+                * parser.parse( "&lt;script&gt;var hello;&lt;/script&gt;" );\r
+                */\r
+               onCDATA         : function() {},\r
+\r
+               /**\r
+                * Function to be fired when a commend is found. This function\r
+                * should be overriden when using this class.\r
+                * @param {String} comment The comment text.\r
+                * @example\r
+                * var parser = new CKEDITOR.htmlParser();\r
+                * parser.onText = function( comment )\r
+                *     {\r
+                *         alert( comment );  // e.g. " Example "\r
+                *     });\r
+                * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
+                */\r
+               onComment       : function() {},\r
+\r
+               /**\r
+                * Parses text, looking for HTML tokens, like tag openers or closers,\r
+                * or comments. This function fires the onTagOpen, onTagClose, onText\r
+                * and onComment function during its execution.\r
+                * @param {String} html The HTML to be parsed.\r
+                * @example\r
+                * var parser = new CKEDITOR.htmlParser();\r
+                * // The onTagOpen, onTagClose, onText and onComment should be overriden\r
+                * // at this point.\r
+                * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
+                */\r
+               parse : function( html )\r
+               {\r
+                       var parts,\r
+                               tagName,\r
+                               nextIndex = 0,\r
+                               cdata;  // The collected data inside a CDATA section.\r
+\r
+                       while ( ( parts = this._.htmlPartsRegex.exec( html ) ) )\r
+                       {\r
+                               var tagIndex = parts.index;\r
+                               if ( tagIndex > nextIndex )\r
+                               {\r
+                                       var text = html.substring( nextIndex, tagIndex );\r
+\r
+                                       if ( cdata )\r
+                                               cdata.push( text );\r
+                                       else\r
+                                               this.onText( text );\r
+                               }\r
+\r
+                               nextIndex = this._.htmlPartsRegex.lastIndex;\r
+\r
+                               /*\r
+                                "parts" is an array with the following items:\r
+                                       0 : The entire match for opening/closing tags and comments.\r
+                                       1 : Group filled with the tag name for closing tags.\r
+                                       2 : Group filled with the comment text.\r
+                                       3 : Group filled with the tag name for opening tags.\r
+                                       4 : Group filled with the attributes part of opening tags.\r
+                                */\r
+\r
+                               // Closing tag\r
+                               if ( ( tagName = parts[ 1 ] ) )\r
+                               {\r
+                                       tagName = tagName.toLowerCase();\r
+\r
+                                       if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] )\r
+                                       {\r
+                                               // Send the CDATA data.\r
+                                               this.onCDATA( cdata.join('') );\r
+                                               cdata = null;\r
+                                       }\r
+\r
+                                       if ( !cdata )\r
+                                       {\r
+                                               this.onTagClose( tagName );\r
+                                               continue;\r
+                                       }\r
+                               }\r
+\r
+                               // If CDATA is enabled, just save the raw match.\r
+                               if ( cdata )\r
+                               {\r
+                                       cdata.push( parts[ 0 ] );\r
+                                       continue;\r
+                               }\r
+\r
+                               // Opening tag\r
+                               if ( ( tagName = parts[ 3 ] ) )\r
+                               {\r
+                                       tagName = tagName.toLowerCase();\r
+                                       var attribs = {},\r
+                                               attribMatch,\r
+                                               attribsPart = parts[ 4 ],\r
+                                               selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );\r
+\r
+                                       if ( attribsPart )\r
+                                       {\r
+                                               while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) )\r
+                                               {\r
+                                                       var attName = attribMatch[1].toLowerCase(),\r
+                                                               attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || '';\r
+\r
+                                                       if ( !attValue && emptyAttribs[ attName ] )\r
+                                                               attribs[ attName ] = attName;\r
+                                                       else\r
+                                                               attribs[ attName ] = attValue;\r
+                                               }\r
+                                       }\r
+\r
+                                       this.onTagOpen( tagName, attribs, selfClosing );\r
+\r
+                                       // Open CDATA mode when finding the appropriate tags.\r
+                                       if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )\r
+                                               cdata = [];\r
+\r
+                                       continue;\r
+                               }\r
+\r
+                               // Comment\r
+                               if( ( tagName = parts[ 2 ] ) )\r
+                                       this.onComment( tagName );\r
+                       }\r
+\r
+                       if ( html.length > nextIndex )\r
+                               this.onText( html.substring( nextIndex, html.length ) );\r
+               }\r
+       };\r
+})();\r