JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
vanilla ckeditor-3.1
[ckeditor.git] / _source / core / htmlparser.js
1 /*\r
2 Copyright (c) 2003-2010, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 /**\r
7  * HTML text parser.\r
8  * @constructor\r
9  * @example\r
10  */\r
11 CKEDITOR.htmlParser = function()\r
12 {\r
13         this._ =\r
14         {\r
15                 htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:[^"\'>]+)|(?:"[^"]*")|(?:\'[^\']*\'))*)\\/?>))', 'g' )\r
16         };\r
17 };\r
18 \r
19 (function()\r
20 {\r
21         var attribsRegex        = /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,\r
22                 emptyAttribs    = {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};\r
23 \r
24         CKEDITOR.htmlParser.prototype =\r
25         {\r
26                 /**\r
27                  * Function to be fired when a tag opener is found. This function\r
28                  * should be overriden when using this class.\r
29                  * @param {String} tagName The tag name. The name is guarantted to be\r
30                  *              lowercased.\r
31                  * @param {Object} attributes An object containing all tag attributes. Each\r
32                  *              property in this object represent and attribute name and its\r
33                  *              value is the attribute value.\r
34                  * @param {Boolean} selfClosing true if the tag closes itself, false if the\r
35                  *              tag doesn't.\r
36                  * @example\r
37                  * var parser = new CKEDITOR.htmlParser();\r
38                  * parser.onTagOpen = function( tagName, attributes, selfClosing )\r
39                  *     {\r
40                  *         alert( tagName );  // e.g. "b"\r
41                  *     });\r
42                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
43                  */\r
44                 onTagOpen       : function() {},\r
45 \r
46                 /**\r
47                  * Function to be fired when a tag closer is found. This function\r
48                  * should be overriden when using this class.\r
49                  * @param {String} tagName The tag name. The name is guarantted to be\r
50                  *              lowercased.\r
51                  * @example\r
52                  * var parser = new CKEDITOR.htmlParser();\r
53                  * parser.onTagClose = function( tagName )\r
54                  *     {\r
55                  *         alert( tagName );  // e.g. "b"\r
56                  *     });\r
57                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
58                  */\r
59                 onTagClose      : function() {},\r
60 \r
61                 /**\r
62                  * Function to be fired when text is found. This function\r
63                  * should be overriden when using this class.\r
64                  * @param {String} text The text found.\r
65                  * @example\r
66                  * var parser = new CKEDITOR.htmlParser();\r
67                  * parser.onText = function( text )\r
68                  *     {\r
69                  *         alert( text );  // e.g. "Hello"\r
70                  *     });\r
71                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
72                  */\r
73                 onText          : function() {},\r
74 \r
75                 /**\r
76                  * Function to be fired when CDATA section is found. This function\r
77                  * should be overriden when using this class.\r
78                  * @param {String} cdata The CDATA been found.\r
79                  * @example\r
80                  * var parser = new CKEDITOR.htmlParser();\r
81                  * parser.onCDATA = function( cdata )\r
82                  *     {\r
83                  *         alert( cdata );  // e.g. "var hello;"\r
84                  *     });\r
85                  * parser.parse( "&lt;script&gt;var hello;&lt;/script&gt;" );\r
86                  */\r
87                 onCDATA         : function() {},\r
88 \r
89                 /**\r
90                  * Function to be fired when a commend is found. This function\r
91                  * should be overriden when using this class.\r
92                  * @param {String} comment The comment text.\r
93                  * @example\r
94                  * var parser = new CKEDITOR.htmlParser();\r
95                  * parser.onText = function( comment )\r
96                  *     {\r
97                  *         alert( comment );  // e.g. " Example "\r
98                  *     });\r
99                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
100                  */\r
101                 onComment       : function() {},\r
102 \r
103                 /**\r
104                  * Parses text, looking for HTML tokens, like tag openers or closers,\r
105                  * or comments. This function fires the onTagOpen, onTagClose, onText\r
106                  * and onComment function during its execution.\r
107                  * @param {String} html The HTML to be parsed.\r
108                  * @example\r
109                  * var parser = new CKEDITOR.htmlParser();\r
110                  * // The onTagOpen, onTagClose, onText and onComment should be overriden\r
111                  * // at this point.\r
112                  * parser.parse( "&lt;!-- Example --&gt;&lt;b&gt;Hello&lt;/b&gt;" );\r
113                  */\r
114                 parse : function( html )\r
115                 {\r
116                         var parts,\r
117                                 tagName,\r
118                                 nextIndex = 0,\r
119                                 cdata;  // The collected data inside a CDATA section.\r
120 \r
121                         while ( ( parts = this._.htmlPartsRegex.exec( html ) ) )\r
122                         {\r
123                                 var tagIndex = parts.index;\r
124                                 if ( tagIndex > nextIndex )\r
125                                 {\r
126                                         var text = html.substring( nextIndex, tagIndex );\r
127 \r
128                                         if ( cdata )\r
129                                                 cdata.push( text );\r
130                                         else\r
131                                                 this.onText( text );\r
132                                 }\r
133 \r
134                                 nextIndex = this._.htmlPartsRegex.lastIndex;\r
135 \r
136                                 /*\r
137                                  "parts" is an array with the following items:\r
138                                         0 : The entire match for opening/closing tags and comments.\r
139                                         1 : Group filled with the tag name for closing tags.\r
140                                         2 : Group filled with the comment text.\r
141                                         3 : Group filled with the tag name for opening tags.\r
142                                         4 : Group filled with the attributes part of opening tags.\r
143                                  */\r
144 \r
145                                 // Closing tag\r
146                                 if ( ( tagName = parts[ 1 ] ) )\r
147                                 {\r
148                                         tagName = tagName.toLowerCase();\r
149 \r
150                                         if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] )\r
151                                         {\r
152                                                 // Send the CDATA data.\r
153                                                 this.onCDATA( cdata.join('') );\r
154                                                 cdata = null;\r
155                                         }\r
156 \r
157                                         if ( !cdata )\r
158                                         {\r
159                                                 this.onTagClose( tagName );\r
160                                                 continue;\r
161                                         }\r
162                                 }\r
163 \r
164                                 // If CDATA is enabled, just save the raw match.\r
165                                 if ( cdata )\r
166                                 {\r
167                                         cdata.push( parts[ 0 ] );\r
168                                         continue;\r
169                                 }\r
170 \r
171                                 // Opening tag\r
172                                 if ( ( tagName = parts[ 3 ] ) )\r
173                                 {\r
174                                         tagName = tagName.toLowerCase();\r
175                                         var attribs = {},\r
176                                                 attribMatch,\r
177                                                 attribsPart = parts[ 4 ],\r
178                                                 selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );\r
179 \r
180                                         if ( attribsPart )\r
181                                         {\r
182                                                 while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) )\r
183                                                 {\r
184                                                         var attName = attribMatch[1].toLowerCase(),\r
185                                                                 attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || '';\r
186 \r
187                                                         if ( !attValue && emptyAttribs[ attName ] )\r
188                                                                 attribs[ attName ] = attName;\r
189                                                         else\r
190                                                                 attribs[ attName ] = attValue;\r
191                                                 }\r
192                                         }\r
193 \r
194                                         this.onTagOpen( tagName, attribs, selfClosing );\r
195 \r
196                                         // Open CDATA mode when finding the appropriate tags.\r
197                                         if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )\r
198                                                 cdata = [];\r
199 \r
200                                         continue;\r
201                                 }\r
202 \r
203                                 // Comment\r
204                                 if( ( tagName = parts[ 2 ] ) )\r
205                                         this.onComment( tagName );\r
206                         }\r
207 \r
208                         if ( html.length > nextIndex )\r
209                                 this.onText( html.substring( nextIndex, html.length ) );\r
210                 }\r
211         };\r
212 })();\r