JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
01be3582ab96f229c1c50b9c6ae87474fe07de2e
[ckeditor.git] / _source / plugins / htmldataprocessor / plugin.js
1 /*\r
2 Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 (function()\r
7 {\r
8         // Regex to scan for   at the end of blocks, which are actually placeholders.\r
9         // Safari transforms the   to \xa0. (#4172)\r
10         var tailNbspRegex = /^[\t\r\n ]*(?: |\xa0)$/;\r
11 \r
12         var protectedSourceMarker = '{cke_protected}';\r
13 \r
14 \r
15         // Return the last non-space child node of the block (#4344).\r
16         function lastNoneSpaceChild( block )\r
17         {\r
18                 var lastIndex = block.children.length,\r
19                         last = block.children[ lastIndex - 1 ];\r
20                 while(  last && last.type == CKEDITOR.NODE_TEXT && !CKEDITOR.tools.trim( last.value ) )\r
21                         last = block.children[ --lastIndex ];\r
22                 return last;\r
23         }\r
24 \r
25         function trimFillers( block, fromSource )\r
26         {\r
27                 // If the current node is a block, and if we're converting from source or\r
28                 // we're not in IE then search for and remove any tailing BR node.\r
29                 //\r
30                 // Also, any   at the end of blocks are fillers, remove them as well.\r
31                 // (#2886)\r
32                 var children = block.children, lastChild = lastNoneSpaceChild( block );\r
33                 if ( lastChild )\r
34                 {\r
35                         if ( ( fromSource || !CKEDITOR.env.ie ) && lastChild.type == CKEDITOR.NODE_ELEMENT && lastChild.name == 'br' )\r
36                                 children.pop();\r
37                         if ( lastChild.type == CKEDITOR.NODE_TEXT && tailNbspRegex.test( lastChild.value ) )\r
38                                 children.pop();\r
39                 }\r
40         }\r
41 \r
42         function blockNeedsExtension( block )\r
43         {\r
44                 var lastChild = lastNoneSpaceChild( block );\r
45                 return !lastChild || lastChild.type == CKEDITOR.NODE_ELEMENT && lastChild.name == 'br';\r
46         }\r
47 \r
48         function extendBlockForDisplay( block )\r
49         {\r
50                 trimFillers( block, true );\r
51 \r
52                 if ( blockNeedsExtension( block ) )\r
53                 {\r
54                         if ( CKEDITOR.env.ie )\r
55                                 block.add( new CKEDITOR.htmlParser.text( '\xa0' ) );\r
56                         else\r
57                                 block.add( new CKEDITOR.htmlParser.element( 'br', {} ) );\r
58                 }\r
59         }\r
60 \r
61         function extendBlockForOutput( block )\r
62         {\r
63                 trimFillers( block );\r
64 \r
65                 if ( blockNeedsExtension( block ) )\r
66                         block.add( new CKEDITOR.htmlParser.text( '\xa0' ) );\r
67         }\r
68 \r
69         var dtd = CKEDITOR.dtd;\r
70 \r
71         // Find out the list of block-like tags that can contain <br>.\r
72         var blockLikeTags = CKEDITOR.tools.extend( {}, dtd.$block, dtd.$listItem, dtd.$tableContent );\r
73         for ( var i in blockLikeTags )\r
74         {\r
75                 if ( ! ( 'br' in dtd[i] ) )\r
76                         delete blockLikeTags[i];\r
77         }\r
78         // We just avoid filler in <pre> right now.\r
79         // TODO: Support filler for <pre>, line break is also occupy line height.\r
80         delete blockLikeTags.pre;\r
81         var defaultDataFilterRules =\r
82         {\r
83                 attributeNames :\r
84                 [\r
85                         // Event attributes (onXYZ) must not be directly set. They can become\r
86                         // active in the editing area (IE|WebKit).\r
87                         [ ( /^on/ ), '_cke_pa_on' ]\r
88                 ]\r
89         };\r
90 \r
91         var defaultDataBlockFilterRules = { elements : {} };\r
92 \r
93         for ( i in blockLikeTags )\r
94                 defaultDataBlockFilterRules.elements[ i ] = extendBlockForDisplay;\r
95 \r
96         var defaultHtmlFilterRules =\r
97                 {\r
98                         elementNames :\r
99                         [\r
100                                 // Remove the "cke:" namespace prefix.\r
101                                 [ ( /^cke:/ ), '' ],\r
102 \r
103                                 // Ignore <?xml:namespace> tags.\r
104                                 [ ( /^\?xml:namespace$/ ), '' ]\r
105                         ],\r
106 \r
107                         attributeNames :\r
108                         [\r
109                                 // Attributes saved for changes and protected attributes.\r
110                                 [ ( /^_cke_(saved|pa)_/ ), '' ],\r
111 \r
112                                 // All "_cke" attributes are to be ignored.\r
113                                 [ ( /^_cke.*/ ), '' ]\r
114                         ],\r
115 \r
116                         elements :\r
117                         {\r
118                                 $ : function( element )\r
119                                 {\r
120                                         // Remove duplicated attributes - #3789.\r
121                                         var attribs = element.attributes;\r
122 \r
123                                         if ( attribs )\r
124                                         {\r
125                                                 var attributeNames = [ 'name', 'href', 'src' ],\r
126                                                         savedAttributeName;\r
127                                                 for ( var i = 0 ; i < attributeNames.length ; i++ )\r
128                                                 {\r
129                                                         savedAttributeName = '_cke_saved_' + attributeNames[ i ];\r
130                                                         savedAttributeName in attribs && ( delete attribs[ attributeNames[ i ] ] );\r
131                                                 }\r
132                                         }\r
133                                 },\r
134 \r
135                                 embed : function( element )\r
136                                 {\r
137                                         var parent = element.parent;\r
138 \r
139                                         // If the <embed> is child of a <object>, copy the width\r
140                                         // and height attributes from it.\r
141                                         if ( parent && parent.name == 'object' )\r
142                                         {\r
143                                                 var parentWidth = parent.attributes.width,\r
144                                                         parentHeight = parent.attributes.height;\r
145                                                 parentWidth && ( element.attributes.width = parentWidth );\r
146                                                 parentHeight && ( element.attributes.height = parentHeight );\r
147                                         }\r
148                                 },\r
149                                 // Restore param elements into self-closing.\r
150                                 param : function( param )\r
151                                 {\r
152                                         param.children = [];\r
153                                         param.isEmpty = true;\r
154                                         return param;\r
155                                 },\r
156 \r
157                                 // Remove empty link but not empty anchor.(#3829)\r
158                                 a : function( element )\r
159                                 {\r
160                                         if ( !( element.children.length ||\r
161                                                         element.attributes.name ||\r
162                                                         element.attributes._cke_saved_name ) )\r
163                                         {\r
164                                                 return false;\r
165                                         }\r
166                                 }\r
167                         },\r
168 \r
169                         attributes :\r
170                         {\r
171                                 'class' : function( value, element )\r
172                                 {\r
173                                         // Remove all class names starting with "cke_".\r
174                                         return CKEDITOR.tools.ltrim( value.replace( /(?:^|\s+)cke_[^\s]*/g, '' ) ) || false;\r
175                                 }\r
176                         },\r
177 \r
178                         comment : function( contents )\r
179                         {\r
180                                 if ( contents.substr( 0, protectedSourceMarker.length ) == protectedSourceMarker )\r
181                                         return new CKEDITOR.htmlParser.cdata( decodeURIComponent( contents.substr( protectedSourceMarker.length ) ) );\r
182 \r
183                                 return contents;\r
184                         }\r
185                 };\r
186 \r
187         var defaultHtmlBlockFilterRules = { elements : {} };\r
188 \r
189         for ( i in blockLikeTags )\r
190                 defaultHtmlBlockFilterRules.elements[ i ] = extendBlockForOutput;\r
191 \r
192         if ( CKEDITOR.env.ie )\r
193         {\r
194                 // IE outputs style attribute in capital letters. We should convert\r
195                 // them back to lower case.\r
196                 defaultHtmlFilterRules.attributes.style = function( value, element )\r
197                 {\r
198                         return value.toLowerCase();\r
199                 };\r
200         }\r
201 \r
202         var protectAttributeRegex = /<(?:a|area|img|input)[\s\S]*?\s((?:href|src|name)\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|(?:[^ "'>]+)))/gi;\r
203 \r
204         function protectAttributes( html )\r
205         {\r
206                 return html.replace( protectAttributeRegex, '$& _cke_saved_$1' );\r
207         }\r
208 \r
209         var protectStyleTagsRegex = /<(style)(?=[ >])[^>]*>[^<]*<\/\1>/gi;\r
210         var encodedTagsRegex = /<cke:encoded>([^<]*)<\/cke:encoded>/gi;\r
211         var protectElementNamesRegex = /(<\/?)((?:object|embed|param)[\s\S]*?>)/gi;\r
212         var protectSelfClosingRegex = /<cke:(param|embed)([\s\S]*?)\/?>/gi;\r
213 \r
214         function protectStyleTagsMatch( match )\r
215         {\r
216                 return '<cke:encoded>' + encodeURIComponent( match ) + '</cke:encoded>';\r
217         }\r
218 \r
219         function protectStyleTags( html )\r
220         {\r
221                 return html.replace( protectStyleTagsRegex, protectStyleTagsMatch );\r
222         }\r
223         function protectElementsNames( html )\r
224         {\r
225                 return html.replace( protectElementNamesRegex, '$1cke:$2');\r
226         }\r
227         function protectSelfClosingElements( html )\r
228         {\r
229                 return html.replace( protectSelfClosingRegex, '<cke:$1$2></cke:$1>' );\r
230         }\r
231 \r
232         function unprotectEncodedTagsMatch( match, encoded )\r
233         {\r
234                 return decodeURIComponent( encoded );\r
235         }\r
236 \r
237         function unprotectEncodedTags( html )\r
238         {\r
239                 return html.replace( encodedTagsRegex, unprotectEncodedTagsMatch );\r
240         }\r
241 \r
242         function protectSource( data, protectRegexes )\r
243         {\r
244                 var protectedHtml = [],\r
245                         tempRegex = /<\!--\{cke_temp\}(\d*?)-->/g;\r
246                 var regexes =\r
247                         [\r
248                                 // First of any other protection, we must protect all comments\r
249                                 // to avoid loosing them (of course, IE related).\r
250                                 (/<!--[\s\S]*?-->/g),\r
251 \r
252                                 // Script tags will also be forced to be protected, otherwise\r
253                                 // IE will execute them.\r
254                                 /<script[\s\S]*?<\/script>/gi,\r
255 \r
256                                 // <noscript> tags (get lost in IE and messed up in FF).\r
257                                 /<noscript[\s\S]*?<\/noscript>/gi\r
258                         ]\r
259                         .concat( protectRegexes );\r
260 \r
261                 for ( var i = 0 ; i < regexes.length ; i++ )\r
262                 {\r
263                         data = data.replace( regexes[i], function( match )\r
264                                 {\r
265                                         match = match.replace( tempRegex,               // There could be protected source inside another one. (#3869).\r
266                                                 function( $, id )\r
267                                                 {\r
268                                                         return protectedHtml[ id ];\r
269                                                 }\r
270                                         );\r
271                                         return  '<!--{cke_temp}' + ( protectedHtml.push( match ) - 1 ) + '-->';\r
272                                 });\r
273                 }\r
274                 data = data.replace( tempRegex, function( $, id )\r
275                         {\r
276                                 return '<!--' + protectedSourceMarker +\r
277                                                 encodeURIComponent( protectedHtml[ id ] ).replace( /--/g, '%2D%2D' ) +\r
278                                                 '-->';\r
279                         }\r
280                 );\r
281                 return data;\r
282         }\r
283 \r
284         CKEDITOR.plugins.add( 'htmldataprocessor',\r
285         {\r
286                 requires : [ 'htmlwriter' ],\r
287 \r
288                 init : function( editor )\r
289                 {\r
290                         var dataProcessor = editor.dataProcessor = new CKEDITOR.htmlDataProcessor( editor );\r
291 \r
292                         dataProcessor.writer.forceSimpleAmpersand = editor.config.forceSimpleAmpersand;\r
293 \r
294                         dataProcessor.dataFilter.addRules( defaultDataFilterRules );\r
295                         dataProcessor.dataFilter.addRules( defaultDataBlockFilterRules );\r
296                         dataProcessor.htmlFilter.addRules( defaultHtmlFilterRules );\r
297                         dataProcessor.htmlFilter.addRules( defaultHtmlBlockFilterRules );\r
298                 }\r
299         });\r
300 \r
301         CKEDITOR.htmlDataProcessor = function( editor )\r
302         {\r
303                 this.editor = editor;\r
304 \r
305                 this.writer = new CKEDITOR.htmlWriter();\r
306                 this.dataFilter = new CKEDITOR.htmlParser.filter();\r
307                 this.htmlFilter = new CKEDITOR.htmlParser.filter();\r
308         };\r
309 \r
310         CKEDITOR.htmlDataProcessor.prototype =\r
311         {\r
312                 toHtml : function( data, fixForBody )\r
313                 {\r
314                         // The source data is already HTML, but we need to clean\r
315                         // it up and apply the filter.\r
316 \r
317                         data = protectSource( data, this.editor.config.protectedSource );\r
318 \r
319                         // Before anything, we must protect the URL attributes as the\r
320                         // browser may changing them when setting the innerHTML later in\r
321                         // the code.\r
322                         data = protectAttributes( data );\r
323 \r
324                         // IE remvoes style tags from innerHTML. (#3710).\r
325                         if ( CKEDITOR.env.ie )\r
326                                 data = protectStyleTags( data );\r
327 \r
328                         // Certain elements has problem to go through DOM operation, protect\r
329                         // them by prefixing 'cke' namespace.(#3591)\r
330                         data = protectElementsNames( data );\r
331 \r
332                         // All none-IE browsers ignore self-closed custom elements,\r
333                         // protecting them into open-close.(#3591)\r
334                         data = protectSelfClosingElements( data );\r
335 \r
336                         // Call the browser to help us fixing a possibly invalid HTML\r
337                         // structure.\r
338                         var div = document.createElement( 'div' );\r
339                         // Add fake character to workaround IE comments bug. (#3801)\r
340                         div.innerHTML = 'a' + data;\r
341                         data = div.innerHTML.substr( 1 );\r
342 \r
343                         if ( CKEDITOR.env.ie )\r
344                                 data = unprotectEncodedTags( data );\r
345 \r
346                         // Now use our parser to make further fixes to the structure, as\r
347                         // well as apply the filter.\r
348                         var fragment = CKEDITOR.htmlParser.fragment.fromHtml( data, fixForBody ),\r
349                                 writer = new CKEDITOR.htmlParser.basicWriter();\r
350 \r
351                         fragment.writeHtml( writer, this.dataFilter );\r
352 \r
353                         return writer.getHtml( true );\r
354                 },\r
355 \r
356                 toDataFormat : function( html, fixForBody )\r
357                 {\r
358                         var writer = this.writer,\r
359                                 fragment = CKEDITOR.htmlParser.fragment.fromHtml( html, fixForBody );\r
360 \r
361                         writer.reset();\r
362 \r
363                         fragment.writeHtml( writer, this.htmlFilter );\r
364 \r
365                         return writer.getHtml( true );\r
366                 }\r
367         };\r
368 })();\r
369 \r
370 /**\r
371  * Whether to force using "&" instead of "&amp;amp;" in elements attributes\r
372  * values. It's not recommended to change this setting for compliance with the\r
373  * W3C XHTML 1.0 standards\r
374  * (<a href="http://www.w3.org/TR/xhtml1/#C_12">C.12, XHTML 1.0</a>).\r
375  * @type Boolean\r
376  * @default false\r
377  * @example\r
378  * config.forceSimpleAmpersand = false;\r
379  */\r
380 CKEDITOR.config.forceSimpleAmpersand = false;\r