JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
acd5ef5efb4752cd1951a2930d9b9883f699c0e2
[ckeditor.git] / _source / core / htmlparser.js
1 /*\r
2 Copyright (c) 2003-2010, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 /**\r
7  * HTML text parser.\r
8  * @constructor\r
9  * @example\r
10  */\r
11 CKEDITOR.htmlParser = function()\r
12 {\r
13         this._ =\r
14         {\r
15                 htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:[^"\'>]+)|(?:"[^"]*")|(?:\'[^\']*\'))*)\\/?>))', 'g' )\r
16         };\r
17 };\r
18 \r
19 (function()\r
20 {\r
21         var attribsRegex        = /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,\r
22                 emptyAttribs    = {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};\r
23 \r
24         CKEDITOR.htmlParser.prototype =\r
25         {\r
26                 /**\r
27                  * Function to be fired when a tag opener is found. This function\r
28                  * should be overriden when using this class.\r
29                  * @param {String} tagName The tag name. The name is guarantted to be\r
30                  *              lowercased.\r
31                  * @param {Object} attributes An object containing all tag attributes. Each\r
32                  *              property in this object represent and attribute name and its\r
33                  *              value is the attribute value.\r
34                  * @param {Boolean} selfClosing true if the tag closes itself, false if the\r
35                  *              tag doesn't.\r
36                  * @example\r
37                  * var parser = new CKEDITOR.htmlParser();\r
38                  * parser.onTagOpen = function( tagName, attributes, selfClosing )\r
39                  *     {\r
40                  *         alert( tagName );  // e.g. "b"\r
41                  *     });\r
42                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
43                  */\r
44                 onTagOpen       : function() {},\r
45 \r
46                 /**\r
47                  * Function to be fired when a tag closer is found. This function\r
48                  * should be overriden when using this class.\r
49                  * @param {String} tagName The tag name. The name is guarantted to be\r
50                  *              lowercased.\r
51                  * @example\r
52                  * var parser = new CKEDITOR.htmlParser();\r
53                  * parser.onTagClose = function( tagName )\r
54                  *     {\r
55                  *         alert( tagName );  // e.g. "b"\r
56                  *     });\r
57                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
58                  */\r
59                 onTagClose      : function() {},\r
60 \r
61                 /**\r
62                  * Function to be fired when text is found. This function\r
63                  * should be overriden when using this class.\r
64                  * @param {String} text The text found.\r
65                  * @example\r
66                  * var parser = new CKEDITOR.htmlParser();\r
67                  * parser.onText = function( text )\r
68                  *     {\r
69                  *         alert( text );  // e.g. "Hello"\r
70                  *     });\r
71                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
72                  */\r
73                 onText          : function() {},\r
74 \r
75                 /**\r
76                  * Function to be fired when CDATA section is found. This function\r
77                  * should be overriden when using this class.\r
78                  * @param {String} cdata The CDATA been found.\r
79                  * @example\r
80                  * var parser = new CKEDITOR.htmlParser();\r
81                  * parser.onCDATA = function( cdata )\r
82                  *     {\r
83                  *         alert( cdata );  // e.g. "var hello;"\r
84                  *     });\r
85                  * parser.parse( "&lt;script&gt;var hello;&lt;/script&gt;" );\r
86                  */\r
87                 onCDATA         : function() {},\r
88 \r
89                 /**\r
90                  * Function to be fired when a commend is found. This function\r
91                  * should be overriden when using this class.\r
92                  * @param {String} comment The comment text.\r
93                  * @example\r
94                  * var parser = new CKEDITOR.htmlParser();\r
95                  * parser.onText = function( comment )\r
96                  *     {\r
97                  *         alert( comment );  // e.g. " Example "\r
98                  *     });\r
99                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
100                  */\r
101                 onComment       : function() {},\r
102 \r
103                 /**\r
104                  * Parses text, looking for HTML tokens, like tag openers or closers,\r
105                  * or comments. This function fires the onTagOpen, onTagClose, onText\r
106                  * and onComment function during its execution.\r
107                  * @param {String} html The HTML to be parsed.\r
108                  * @example\r
109                  * var parser = new CKEDITOR.htmlParser();\r
110                  * // The onTagOpen, onTagClose, onText and onComment should be overriden\r
111                  * // at this point.\r
112                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
113                  */\r
114                 parse : function( html )\r
115                 {\r
116                         var parts,\r
117                                 tagName,\r
118                                 nextIndex = 0,\r
119                                 cdata;  // The collected data inside a CDATA section.\r
120 \r
121                         while ( ( parts = this._.htmlPartsRegex.exec( html ) ) )\r
122                         {\r
123                                 var tagIndex = parts.index;\r
124                                 if ( tagIndex > nextIndex )\r
125                                 {\r
126                                         var text = html.substring( nextIndex, tagIndex );\r
127 \r
128                                         if ( cdata )\r
129                                                 cdata.push( text );\r
130                                         else\r
131                                                 this.onText( text );\r
132                                 }\r
133 \r
134                                 nextIndex = this._.htmlPartsRegex.lastIndex;\r
135 \r
136                                 /*\r
137                                  "parts" is an array with the following items:\r
138                                         0 : The entire match for opening/closing tags and comments.\r
139                                         1 : Group filled with the tag name for closing tags.\r
140                                         2 : Group filled with the comment text.\r
141                                         3 : Group filled with the tag name for opening tags.\r
142                                         4 : Group filled with the attributes part of opening tags.\r
143                                  */\r
144 \r
145                                 // Closing tag\r
146                                 if ( ( tagName = parts[ 1 ] ) )\r
147                                 {\r
148                                         tagName = tagName.toLowerCase();\r
149 \r
150                                         if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] )\r
151                                         {\r
152                                                 // Send the CDATA data.\r
153                                                 this.onCDATA( cdata.join('') );\r
154                                                 cdata = null;\r
155                                         }\r
156 \r
157                                         if ( !cdata )\r
158                                         {\r
159                                                 this.onTagClose( tagName );\r
160                                                 continue;\r
161                                         }\r
162                                 }\r
163 \r
164                                 // If CDATA is enabled, just save the raw match.\r
165                                 if ( cdata )\r
166                                 {\r
167                                         cdata.push( parts[ 0 ] );\r
168                                         continue;\r
169                                 }\r
170 \r
171                                 // Opening tag\r
172                                 if ( ( tagName = parts[ 3 ] ) )\r
173                                 {\r
174                                         tagName = tagName.toLowerCase();\r
175 \r
176                                         // There are some tag names that can break things, so let's\r
177                                         // simply ignore them when parsing. (#5224)\r
178                                         if ( /="/.test( tagName ) )\r
179                                                 continue;\r
180 \r
181                                         var attribs = {},\r
182                                                 attribMatch,\r
183                                                 attribsPart = parts[ 4 ],\r
184                                                 selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );\r
185 \r
186                                         if ( attribsPart )\r
187                                         {\r
188                                                 while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) )\r
189                                                 {\r
190                                                         var attName = attribMatch[1].toLowerCase(),\r
191                                                                 attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || '';\r
192 \r
193                                                         if ( !attValue && emptyAttribs[ attName ] )\r
194                                                                 attribs[ attName ] = attName;\r
195                                                         else\r
196                                                                 attribs[ attName ] = attValue;\r
197                                                 }\r
198                                         }\r
199 \r
200                                         this.onTagOpen( tagName, attribs, selfClosing );\r
201 \r
202                                         // Open CDATA mode when finding the appropriate tags.\r
203                                         if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )\r
204                                                 cdata = [];\r
205 \r
206                                         continue;\r
207                                 }\r
208 \r
209                                 // Comment\r
210                                 if ( ( tagName = parts[ 2 ] ) )\r
211                                         this.onComment( tagName );\r
212                         }\r
213 \r
214                         if ( html.length > nextIndex )\r
215                                 this.onText( html.substring( nextIndex, html.length ) );\r
216                 }\r
217         };\r
218 })();\r