JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
d783d6652b9cc19811c35340a3ab75462255db45
[ckeditor.git] / _source / plugins / htmldataprocessor / plugin.js
1 /*\r
2 Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 (function()\r
7 {\r
8         // Regex to scan for   at the end of blocks, which are actually placeholders.\r
9         // Safari transforms the   to \xa0. (#4172)\r
10         var tailNbspRegex = /^[\t\r\n ]*(?: |\xa0)$/;\r
11 \r
12         var protectedSourceMarker = '{cke_protected}';\r
13 \r
14         function trimFillers( block, fromSource )\r
15         {\r
16                 // If the current node is a block, and if we're converting from source or\r
17                 // we're not in IE then search for and remove any tailing BR node.\r
18                 //\r
19                 // Also, any   at the end of blocks are fillers, remove them as well.\r
20                 // (#2886)\r
21                 var children = block.children;\r
22                 var lastChild = children[ children.length - 1 ];\r
23                 if ( lastChild )\r
24                 {\r
25                         if ( ( fromSource || !CKEDITOR.env.ie ) && lastChild.type == CKEDITOR.NODE_ELEMENT && lastChild.name == 'br' )\r
26                                 children.pop();\r
27                         if ( lastChild.type == CKEDITOR.NODE_TEXT && tailNbspRegex.test( lastChild.value ) )\r
28                                 children.pop();\r
29                 }\r
30         }\r
31 \r
32         function blockNeedsExtension( block )\r
33         {\r
34                 if ( block.children.length < 1 )\r
35                         return true;\r
36 \r
37                 var lastChild = block.children[ block.children.length - 1 ];\r
38                 return lastChild.type == CKEDITOR.NODE_ELEMENT && lastChild.name == 'br';\r
39         }\r
40 \r
41         function extendBlockForDisplay( block )\r
42         {\r
43                 trimFillers( block, true );\r
44 \r
45                 if ( blockNeedsExtension( block ) )\r
46                 {\r
47                         if ( CKEDITOR.env.ie )\r
48                                 block.add( new CKEDITOR.htmlParser.text( '\xa0' ) );\r
49                         else\r
50                                 block.add( new CKEDITOR.htmlParser.element( 'br', {} ) );\r
51                 }\r
52         }\r
53 \r
54         function extendBlockForOutput( block )\r
55         {\r
56                 trimFillers( block );\r
57 \r
58                 if ( blockNeedsExtension( block ) )\r
59                         block.add( new CKEDITOR.htmlParser.text( '\xa0' ) );\r
60         }\r
61 \r
62         var dtd = CKEDITOR.dtd;\r
63 \r
64         // Find out the list of block-like tags that can contain <br>.\r
65         var blockLikeTags = CKEDITOR.tools.extend( {}, dtd.$block, dtd.$listItem, dtd.$tableContent );\r
66         for ( var i in blockLikeTags )\r
67         {\r
68                 if ( ! ( 'br' in dtd[i] ) )\r
69                         delete blockLikeTags[i];\r
70         }\r
71         // We just avoid filler in <pre> right now.\r
72         // TODO: Support filler for <pre>, line break is also occupy line height.\r
73         delete blockLikeTags.pre;\r
74         var defaultDataFilterRules =\r
75         {\r
76                 attributeNames :\r
77                 [\r
78                         // Event attributes (onXYZ) must not be directly set. They can become\r
79                         // active in the editing area (IE|WebKit).\r
80                         [ ( /^on/ ), '_cke_pa_on' ]\r
81                 ]\r
82         };\r
83 \r
84         var defaultDataBlockFilterRules = { elements : {} };\r
85 \r
86         for ( i in blockLikeTags )\r
87                 defaultDataBlockFilterRules.elements[ i ] = extendBlockForDisplay;\r
88 \r
89         var defaultHtmlFilterRules =\r
90                 {\r
91                         elementNames :\r
92                         [\r
93                                 // Remove the "cke:" namespace prefix.\r
94                                 [ ( /^cke:/ ), '' ],\r
95 \r
96                                 // Ignore <?xml:namespace> tags.\r
97                                 [ ( /^\?xml:namespace$/ ), '' ]\r
98                         ],\r
99 \r
100                         attributeNames :\r
101                         [\r
102                                 // Attributes saved for changes and protected attributes.\r
103                                 [ ( /^_cke_(saved|pa)_/ ), '' ],\r
104 \r
105                                 // All "_cke" attributes are to be ignored.\r
106                                 [ ( /^_cke.*/ ), '' ]\r
107                         ],\r
108 \r
109                         elements :\r
110                         {\r
111                                 $ : function( element )\r
112                                 {\r
113                                         // Remove duplicated attributes - #3789.\r
114                                         var attribs = element.attributes;\r
115 \r
116                                         if ( attribs )\r
117                                         {\r
118                                                 var attributeNames = [ 'name', 'href', 'src' ],\r
119                                                         savedAttributeName;\r
120                                                 for ( var i = 0 ; i < attributeNames.length ; i++ )\r
121                                                 {\r
122                                                         savedAttributeName = '_cke_saved_' + attributeNames[ i ];\r
123                                                         savedAttributeName in attribs && ( delete attribs[ attributeNames[ i ] ] );\r
124                                                 }\r
125                                         }\r
126                                 },\r
127 \r
128                                 embed : function( element )\r
129                                 {\r
130                                         var parent = element.parent;\r
131 \r
132                                         // If the <embed> is child of a <object>, copy the width\r
133                                         // and height attributes from it.\r
134                                         if ( parent && parent.name == 'object' )\r
135                                         {\r
136                                                 var parentWidth = parent.attributes.width,\r
137                                                         parentHeight = parent.attributes.height;\r
138                                                 parentWidth && ( element.attributes.width = parentWidth );\r
139                                                 parentHeight && ( element.attributes.height = parentHeight );\r
140                                         }\r
141                                 },\r
142                                 // Restore param elements into self-closing.\r
143                                 param : function( param )\r
144                                 {\r
145                                         param.children = [];\r
146                                         param.isEmpty = true;\r
147                                         return param;\r
148                                 },\r
149 \r
150                                 // Remove empty link but not empty anchor.(#3829)\r
151                                 a : function( element )\r
152                                 {\r
153                                         if ( !( element.children.length ||\r
154                                                         element.attributes.name ||\r
155                                                         element.attributes._cke_saved_name ) )\r
156                                         {\r
157                                                 return false;\r
158                                         }\r
159                                 }\r
160                         },\r
161 \r
162                         attributes :\r
163                         {\r
164                                 'class' : function( value, element )\r
165                                 {\r
166                                         // Remove all class names starting with "cke_".\r
167                                         return CKEDITOR.tools.ltrim( value.replace( /(?:^|\s+)cke_[^\s]*/g, '' ) ) || false;\r
168                                 }\r
169                         },\r
170 \r
171                         comment : function( contents )\r
172                         {\r
173                                 if ( contents.substr( 0, protectedSourceMarker.length ) == protectedSourceMarker )\r
174                                         return new CKEDITOR.htmlParser.cdata( decodeURIComponent( contents.substr( protectedSourceMarker.length ) ) );\r
175 \r
176                                 return contents;\r
177                         }\r
178                 };\r
179 \r
180         var defaultHtmlBlockFilterRules = { elements : {} };\r
181 \r
182         for ( i in blockLikeTags )\r
183                 defaultHtmlBlockFilterRules.elements[ i ] = extendBlockForOutput;\r
184 \r
185         if ( CKEDITOR.env.ie )\r
186         {\r
187                 // IE outputs style attribute in capital letters. We should convert\r
188                 // them back to lower case.\r
189                 defaultHtmlFilterRules.attributes.style = function( value, element )\r
190                 {\r
191                         return value.toLowerCase();\r
192                 };\r
193         }\r
194 \r
195         var protectAttributeRegex = /<(?:a|area|img|input).*?\s((?:href|src|name)\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|(?:[^ "'>]+)))/gi;\r
196 \r
197         function protectAttributes( html )\r
198         {\r
199                 return html.replace( protectAttributeRegex, '$& _cke_saved_$1' );\r
200         }\r
201 \r
202         var protectStyleTagsRegex = /<(style)(?=[ >])[^>]*>[^<]*<\/\1>/gi;\r
203         var encodedTagsRegex = /<cke:encoded>([^<]*)<\/cke:encoded>/gi;\r
204         var protectElementNamesRegex = /(<\/?)((?:object|embed|param).*?>)/gi;\r
205         var protectSelfClosingRegex = /<cke:param(.*?)\/>/gi;\r
206 \r
207         function protectStyleTagsMatch( match )\r
208         {\r
209                 return '<cke:encoded>' + encodeURIComponent( match ) + '</cke:encoded>';\r
210         }\r
211 \r
212         function protectStyleTags( html )\r
213         {\r
214                 return html.replace( protectStyleTagsRegex, protectStyleTagsMatch );\r
215         }\r
216         function protectElementsNames( html )\r
217         {\r
218                 return html.replace( protectElementNamesRegex, '$1cke:$2');\r
219         }\r
220         function protectSelfClosingElements( html )\r
221         {\r
222                 return html.replace( protectSelfClosingRegex, '<cke:param$1></cke:param>' );\r
223         }\r
224 \r
225         function unprotectEncodedTagsMatch( match, encoded )\r
226         {\r
227                 return decodeURIComponent( encoded );\r
228         }\r
229 \r
230         function unprotectEncodedTags( html )\r
231         {\r
232                 return html.replace( encodedTagsRegex, unprotectEncodedTagsMatch );\r
233         }\r
234 \r
235         function protectSource( data, protectRegexes )\r
236         {\r
237                 var protectedHtml = [],\r
238                         tempRegex = /<\!--\{cke_temp\}(\d*?)-->/g;\r
239                 var regexes =\r
240                         [\r
241                                 // First of any other protection, we must protect all comments\r
242                                 // to avoid loosing them (of course, IE related).\r
243                                 (/<!--[\s\S]*?-->/g),\r
244 \r
245                                 // Script tags will also be forced to be protected, otherwise\r
246                                 // IE will execute them.\r
247                                 /<script[\s\S]*?<\/script>/gi,\r
248 \r
249                                 // <noscript> tags (get lost in IE and messed up in FF).\r
250                                 /<noscript[\s\S]*?<\/noscript>/gi\r
251                         ]\r
252                         .concat( protectRegexes );\r
253 \r
254                 for ( var i = 0 ; i < regexes.length ; i++ )\r
255                 {\r
256                         data = data.replace( regexes[i], function( match )\r
257                                 {\r
258                                         match = match.replace( tempRegex,               // There could be protected source inside another one. (#3869).\r
259                                                 function( $, id )\r
260                                                 {\r
261                                                         return protectedHtml[ id ];\r
262                                                 }\r
263                                         );\r
264                                         return  '<!--{cke_temp}' + ( protectedHtml.push( match ) - 1 ) + '-->';\r
265                                 });\r
266                 }\r
267                 data = data.replace( tempRegex, function( $, id )\r
268                         {\r
269                                 return '<!--' + protectedSourceMarker +\r
270                                                 encodeURIComponent( protectedHtml[ id ] ).replace( /--/g, '%2D%2D' ) +\r
271                                                 '-->';\r
272                         }\r
273                 );\r
274                 return data;\r
275         }\r
276 \r
277         CKEDITOR.plugins.add( 'htmldataprocessor',\r
278         {\r
279                 requires : [ 'htmlwriter' ],\r
280 \r
281                 init : function( editor )\r
282                 {\r
283                         var dataProcessor = editor.dataProcessor = new CKEDITOR.htmlDataProcessor( editor );\r
284 \r
285                         dataProcessor.writer.forceSimpleAmpersand = editor.config.forceSimpleAmpersand;\r
286 \r
287                         dataProcessor.dataFilter.addRules( defaultDataFilterRules );\r
288                         dataProcessor.dataFilter.addRules( defaultDataBlockFilterRules );\r
289                         dataProcessor.htmlFilter.addRules( defaultHtmlFilterRules );\r
290                         dataProcessor.htmlFilter.addRules( defaultHtmlBlockFilterRules );\r
291                 }\r
292         });\r
293 \r
294         CKEDITOR.htmlDataProcessor = function( editor )\r
295         {\r
296                 this.editor = editor;\r
297 \r
298                 this.writer = new CKEDITOR.htmlWriter();\r
299                 this.dataFilter = new CKEDITOR.htmlParser.filter();\r
300                 this.htmlFilter = new CKEDITOR.htmlParser.filter();\r
301         };\r
302 \r
303         CKEDITOR.htmlDataProcessor.prototype =\r
304         {\r
305                 toHtml : function( data, fixForBody )\r
306                 {\r
307                         // The source data is already HTML, but we need to clean\r
308                         // it up and apply the filter.\r
309 \r
310                         data = protectSource( data, this.editor.config.protectedSource );\r
311 \r
312                         // Before anything, we must protect the URL attributes as the\r
313                         // browser may changing them when setting the innerHTML later in\r
314                         // the code.\r
315                         data = protectAttributes( data );\r
316 \r
317                         // IE remvoes style tags from innerHTML. (#3710).\r
318                         if ( CKEDITOR.env.ie )\r
319                                 data = protectStyleTags( data );\r
320 \r
321                         // Certain elements has problem to go through DOM operation, protect\r
322                         // them by prefixing 'cke' namespace.(#3591)\r
323                         data = protectElementsNames( data );\r
324 \r
325                         // All none-IE browsers ignore self-closed custom elements,\r
326                         // protecting them into open-close.(#3591)\r
327                         data = protectSelfClosingElements( data );\r
328 \r
329                         // Call the browser to help us fixing a possibly invalid HTML\r
330                         // structure.\r
331                         var div = document.createElement( 'div' );\r
332                         // Add fake character to workaround IE comments bug. (#3801)\r
333                         div.innerHTML = 'a' + data;\r
334                         data = div.innerHTML.substr( 1 );\r
335 \r
336                         if ( CKEDITOR.env.ie )\r
337                                 data = unprotectEncodedTags( data );\r
338 \r
339                         // Now use our parser to make further fixes to the structure, as\r
340                         // well as apply the filter.\r
341                         var fragment = CKEDITOR.htmlParser.fragment.fromHtml( data, fixForBody ),\r
342                                 writer = new CKEDITOR.htmlParser.basicWriter();\r
343 \r
344                         fragment.writeHtml( writer, this.dataFilter );\r
345 \r
346                         return writer.getHtml( true );\r
347                 },\r
348 \r
349                 toDataFormat : function( html, fixForBody )\r
350                 {\r
351                         var writer = this.writer,\r
352                                 fragment = CKEDITOR.htmlParser.fragment.fromHtml( html, fixForBody );\r
353 \r
354                         writer.reset();\r
355 \r
356                         fragment.writeHtml( writer, this.htmlFilter );\r
357 \r
358                         return writer.getHtml( true );\r
359                 }\r
360         };\r
361 })();\r
362 \r
363 /**\r
364  * Whether to force using "&" instead of "&amp;amp;" in elements attributes\r
365  * values. It's not recommended to change this setting for compliance with the\r
366  * W3C XHTML 1.0 standards\r
367  * (<a href="http://www.w3.org/TR/xhtml1/#C_12">C.12, XHTML 1.0</a>).\r
368  * @type Boolean\r
369  * @default false\r
370  * @example\r
371  * config.forceSimpleAmpersand = false;\r
372  */\r
373 CKEDITOR.config.forceSimpleAmpersand = false;\r