JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
vanilla ckeditor-3.4.2
[ckeditor.git] / _source / core / htmlparser.js
1 /*\r
2 Copyright (c) 2003-2010, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 /**\r
7  * Creates a {@link CKEDITOR.htmlParser} class instance.\r
8  * @class Provides an "event like" system to parse strings of HTML data.\r
9  * @example\r
10  * var parser = new CKEDITOR.htmlParser();\r
11  * parser.onTagOpen = function( tagName, attributes, selfClosing )\r
12  *     {\r
13  *         alert( tagName );\r
14  *     };\r
15  * parser.parse( '<p>Some <b>text</b>.</p>' );\r
16  */\r
17 CKEDITOR.htmlParser = function()\r
18 {\r
19         this._ =\r
20         {\r
21                 htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:[^"\'>]+)|(?:"[^"]*")|(?:\'[^\']*\'))*)\\/?>))', 'g' )\r
22         };\r
23 };\r
24 \r
25 (function()\r
26 {\r
27         var attribsRegex        = /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,\r
28                 emptyAttribs    = {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};\r
29 \r
30         CKEDITOR.htmlParser.prototype =\r
31         {\r
32                 /**\r
33                  * Function to be fired when a tag opener is found. This function\r
34                  * should be overriden when using this class.\r
35                  * @param {String} tagName The tag name. The name is guarantted to be\r
36                  *              lowercased.\r
37                  * @param {Object} attributes An object containing all tag attributes. Each\r
38                  *              property in this object represent and attribute name and its\r
39                  *              value is the attribute value.\r
40                  * @param {Boolean} selfClosing true if the tag closes itself, false if the\r
41                  *              tag doesn't.\r
42                  * @example\r
43                  * var parser = new CKEDITOR.htmlParser();\r
44                  * parser.onTagOpen = function( tagName, attributes, selfClosing )\r
45                  *     {\r
46                  *         alert( tagName );  // e.g. "b"\r
47                  *     });\r
48                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
49                  */\r
50                 onTagOpen       : function() {},\r
51 \r
52                 /**\r
53                  * Function to be fired when a tag closer is found. This function\r
54                  * should be overriden when using this class.\r
55                  * @param {String} tagName The tag name. The name is guarantted to be\r
56                  *              lowercased.\r
57                  * @example\r
58                  * var parser = new CKEDITOR.htmlParser();\r
59                  * parser.onTagClose = function( tagName )\r
60                  *     {\r
61                  *         alert( tagName );  // e.g. "b"\r
62                  *     });\r
63                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
64                  */\r
65                 onTagClose      : function() {},\r
66 \r
67                 /**\r
68                  * Function to be fired when text is found. This function\r
69                  * should be overriden when using this class.\r
70                  * @param {String} text The text found.\r
71                  * @example\r
72                  * var parser = new CKEDITOR.htmlParser();\r
73                  * parser.onText = function( text )\r
74                  *     {\r
75                  *         alert( text );  // e.g. "Hello"\r
76                  *     });\r
77                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
78                  */\r
79                 onText          : function() {},\r
80 \r
81                 /**\r
82                  * Function to be fired when CDATA section is found. This function\r
83                  * should be overriden when using this class.\r
84                  * @param {String} cdata The CDATA been found.\r
85                  * @example\r
86                  * var parser = new CKEDITOR.htmlParser();\r
87                  * parser.onCDATA = function( cdata )\r
88                  *     {\r
89                  *         alert( cdata );  // e.g. "var hello;"\r
90                  *     });\r
91                  * parser.parse( "&lt;script&gt;var hello;&lt;/script&gt;" );\r
92                  */\r
93                 onCDATA         : function() {},\r
94 \r
95                 /**\r
96                  * Function to be fired when a commend is found. This function\r
97                  * should be overriden when using this class.\r
98                  * @param {String} comment The comment text.\r
99                  * @example\r
100                  * var parser = new CKEDITOR.htmlParser();\r
101                  * parser.onComment = function( comment )\r
102                  *     {\r
103                  *         alert( comment );  // e.g. " Example "\r
104                  *     });\r
105                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
106                  */\r
107                 onComment       : function() {},\r
108 \r
109                 /**\r
110                  * Parses text, looking for HTML tokens, like tag openers or closers,\r
111                  * or comments. This function fires the onTagOpen, onTagClose, onText\r
112                  * and onComment function during its execution.\r
113                  * @param {String} html The HTML to be parsed.\r
114                  * @example\r
115                  * var parser = new CKEDITOR.htmlParser();\r
116                  * // The onTagOpen, onTagClose, onText and onComment should be overriden\r
117                  * // at this point.\r
118                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
119                  */\r
120                 parse : function( html )\r
121                 {\r
122                         var parts,\r
123                                 tagName,\r
124                                 nextIndex = 0,\r
125                                 cdata;  // The collected data inside a CDATA section.\r
126 \r
127                         while ( ( parts = this._.htmlPartsRegex.exec( html ) ) )\r
128                         {\r
129                                 var tagIndex = parts.index;\r
130                                 if ( tagIndex > nextIndex )\r
131                                 {\r
132                                         var text = html.substring( nextIndex, tagIndex );\r
133 \r
134                                         if ( cdata )\r
135                                                 cdata.push( text );\r
136                                         else\r
137                                                 this.onText( text );\r
138                                 }\r
139 \r
140                                 nextIndex = this._.htmlPartsRegex.lastIndex;\r
141 \r
142                                 /*\r
143                                  "parts" is an array with the following items:\r
144                                         0 : The entire match for opening/closing tags and comments.\r
145                                         1 : Group filled with the tag name for closing tags.\r
146                                         2 : Group filled with the comment text.\r
147                                         3 : Group filled with the tag name for opening tags.\r
148                                         4 : Group filled with the attributes part of opening tags.\r
149                                  */\r
150 \r
151                                 // Closing tag\r
152                                 if ( ( tagName = parts[ 1 ] ) )\r
153                                 {\r
154                                         tagName = tagName.toLowerCase();\r
155 \r
156                                         if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] )\r
157                                         {\r
158                                                 // Send the CDATA data.\r
159                                                 this.onCDATA( cdata.join('') );\r
160                                                 cdata = null;\r
161                                         }\r
162 \r
163                                         if ( !cdata )\r
164                                         {\r
165                                                 this.onTagClose( tagName );\r
166                                                 continue;\r
167                                         }\r
168                                 }\r
169 \r
170                                 // If CDATA is enabled, just save the raw match.\r
171                                 if ( cdata )\r
172                                 {\r
173                                         cdata.push( parts[ 0 ] );\r
174                                         continue;\r
175                                 }\r
176 \r
177                                 // Opening tag\r
178                                 if ( ( tagName = parts[ 3 ] ) )\r
179                                 {\r
180                                         tagName = tagName.toLowerCase();\r
181 \r
182                                         // There are some tag names that can break things, so let's\r
183                                         // simply ignore them when parsing. (#5224)\r
184                                         if ( /="/.test( tagName ) )\r
185                                                 continue;\r
186 \r
187                                         var attribs = {},\r
188                                                 attribMatch,\r
189                                                 attribsPart = parts[ 4 ],\r
190                                                 selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );\r
191 \r
192                                         if ( attribsPart )\r
193                                         {\r
194                                                 while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) )\r
195                                                 {\r
196                                                         var attName = attribMatch[1].toLowerCase(),\r
197                                                                 attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || '';\r
198 \r
199                                                         if ( !attValue && emptyAttribs[ attName ] )\r
200                                                                 attribs[ attName ] = attName;\r
201                                                         else\r
202                                                                 attribs[ attName ] = attValue;\r
203                                                 }\r
204                                         }\r
205 \r
206                                         this.onTagOpen( tagName, attribs, selfClosing );\r
207 \r
208                                         // Open CDATA mode when finding the appropriate tags.\r
209                                         if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )\r
210                                                 cdata = [];\r
211 \r
212                                         continue;\r
213                                 }\r
214 \r
215                                 // Comment\r
216                                 if ( ( tagName = parts[ 2 ] ) )\r
217                                         this.onComment( tagName );\r
218                         }\r
219 \r
220                         if ( html.length > nextIndex )\r
221                                 this.onText( html.substring( nextIndex, html.length ) );\r
222                 }\r
223         };\r
224 })();\r