JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
vanilla ckeditor-3.2
[ckeditor.git] / _source / plugins / htmldataprocessor / plugin.js
1 /*\r
2 Copyright (c) 2003-2010, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 (function()\r
7 {\r
8         // Regex to scan for   at the end of blocks, which are actually placeholders.\r
9         // Safari transforms the   to \xa0. (#4172)\r
10         var tailNbspRegex = /^[\t\r\n ]*(?: |\xa0)$/;\r
11 \r
12         var protectedSourceMarker = '{cke_protected}';\r
13 \r
14         // Return the last non-space child node of the block (#4344).\r
15         function lastNoneSpaceChild( block )\r
16         {\r
17                 var lastIndex = block.children.length,\r
18                         last = block.children[ lastIndex - 1 ];\r
19                 while (  last && last.type == CKEDITOR.NODE_TEXT && !CKEDITOR.tools.trim( last.value ) )\r
20                         last = block.children[ --lastIndex ];\r
21                 return last;\r
22         }\r
23 \r
24         function trimFillers( block, fromSource )\r
25         {\r
26                 // If the current node is a block, and if we're converting from source or\r
27                 // we're not in IE then search for and remove any tailing BR node.\r
28                 //\r
29                 // Also, any   at the end of blocks are fillers, remove them as well.\r
30                 // (#2886)\r
31                 var children = block.children, lastChild = lastNoneSpaceChild( block );\r
32                 if ( lastChild )\r
33                 {\r
34                         if ( ( fromSource || !CKEDITOR.env.ie ) && lastChild.type == CKEDITOR.NODE_ELEMENT && lastChild.name == 'br' )\r
35                                 children.pop();\r
36                         if ( lastChild.type == CKEDITOR.NODE_TEXT && tailNbspRegex.test( lastChild.value ) )\r
37                                 children.pop();\r
38                 }\r
39         }\r
40 \r
41         function blockNeedsExtension( block )\r
42         {\r
43                 var lastChild = lastNoneSpaceChild( block );\r
44                 return !lastChild || lastChild.type == CKEDITOR.NODE_ELEMENT && lastChild.name == 'br';\r
45         }\r
46 \r
47         function extendBlockForDisplay( block )\r
48         {\r
49                 trimFillers( block, true );\r
50 \r
51                 if ( blockNeedsExtension( block ) )\r
52                 {\r
53                         if ( CKEDITOR.env.ie )\r
54                                 block.add( new CKEDITOR.htmlParser.text( '\xa0' ) );\r
55                         else\r
56                                 block.add( new CKEDITOR.htmlParser.element( 'br', {} ) );\r
57                 }\r
58         }\r
59 \r
60         function extendBlockForOutput( block )\r
61         {\r
62                 trimFillers( block );\r
63 \r
64                 if ( blockNeedsExtension( block ) )\r
65                         block.add( new CKEDITOR.htmlParser.text( '\xa0' ) );\r
66         }\r
67 \r
68         var dtd = CKEDITOR.dtd;\r
69 \r
70         // Find out the list of block-like tags that can contain <br>.\r
71         var blockLikeTags = CKEDITOR.tools.extend( {}, dtd.$block, dtd.$listItem, dtd.$tableContent );\r
72         for ( var i in blockLikeTags )\r
73         {\r
74                 if ( ! ( 'br' in dtd[i] ) )\r
75                         delete blockLikeTags[i];\r
76         }\r
77         // We just avoid filler in <pre> right now.\r
78         // TODO: Support filler for <pre>, line break is also occupy line height.\r
79         delete blockLikeTags.pre;\r
80         var defaultDataFilterRules =\r
81         {\r
82                 attributeNames :\r
83                 [\r
84                         // Event attributes (onXYZ) must not be directly set. They can become\r
85                         // active in the editing area (IE|WebKit).\r
86                         [ ( /^on/ ), '_cke_pa_on' ]\r
87                 ]\r
88         };\r
89 \r
90         var defaultDataBlockFilterRules = { elements : {} };\r
91 \r
92         for ( i in blockLikeTags )\r
93                 defaultDataBlockFilterRules.elements[ i ] = extendBlockForDisplay;\r
94 \r
95         var defaultHtmlFilterRules =\r
96                 {\r
97                         elementNames :\r
98                         [\r
99                                 // Remove the "cke:" namespace prefix.\r
100                                 [ ( /^cke:/ ), '' ],\r
101 \r
102                                 // Ignore <?xml:namespace> tags.\r
103                                 [ ( /^\?xml:namespace$/ ), '' ]\r
104                         ],\r
105 \r
106                         attributeNames :\r
107                         [\r
108                                 // Attributes saved for changes and protected attributes.\r
109                                 [ ( /^_cke_(saved|pa)_/ ), '' ],\r
110 \r
111                                 // All "_cke" attributes are to be ignored.\r
112                                 [ ( /^_cke.*/ ), '' ],\r
113 \r
114                                 [ 'hidefocus', '' ]\r
115                         ],\r
116 \r
117                         elements :\r
118                         {\r
119                                 $ : function( element )\r
120                                 {\r
121                                         var attribs = element.attributes;\r
122 \r
123                                         if ( attribs )\r
124                                         {\r
125                                                 // Elements marked as temporary are to be ignored.\r
126                                                 if ( attribs.cke_temp )\r
127                                                         return false;\r
128 \r
129                                                 // Remove duplicated attributes - #3789.\r
130                                                 var attributeNames = [ 'name', 'href', 'src' ],\r
131                                                         savedAttributeName;\r
132                                                 for ( var i = 0 ; i < attributeNames.length ; i++ )\r
133                                                 {\r
134                                                         savedAttributeName = '_cke_saved_' + attributeNames[ i ];\r
135                                                         savedAttributeName in attribs && ( delete attribs[ attributeNames[ i ] ] );\r
136                                                 }\r
137                                         }\r
138 \r
139                                         return element;\r
140                                 },\r
141 \r
142                                 embed : function( element )\r
143                                 {\r
144                                         var parent = element.parent;\r
145 \r
146                                         // If the <embed> is child of a <object>, copy the width\r
147                                         // and height attributes from it.\r
148                                         if ( parent && parent.name == 'object' )\r
149                                         {\r
150                                                 var parentWidth = parent.attributes.width,\r
151                                                         parentHeight = parent.attributes.height;\r
152                                                 parentWidth && ( element.attributes.width = parentWidth );\r
153                                                 parentHeight && ( element.attributes.height = parentHeight );\r
154                                         }\r
155                                 },\r
156                                 // Restore param elements into self-closing.\r
157                                 param : function( param )\r
158                                 {\r
159                                         param.children = [];\r
160                                         param.isEmpty = true;\r
161                                         return param;\r
162                                 },\r
163 \r
164                                 // Remove empty link but not empty anchor.(#3829)\r
165                                 a : function( element )\r
166                                 {\r
167                                         if ( !( element.children.length ||\r
168                                                         element.attributes.name ||\r
169                                                         element.attributes._cke_saved_name ) )\r
170                                         {\r
171                                                 return false;\r
172                                         }\r
173                                 },\r
174 \r
175                                 body : function( element )\r
176                                 {\r
177                                         delete element.attributes.spellcheck;\r
178                                         delete element.attributes.contenteditable;\r
179                                 },\r
180 \r
181                                 style : function( element )\r
182                                 {\r
183                                         var child = element.children[ 0 ];\r
184                                         child && child.value && ( child.value = CKEDITOR.tools.trim( child.value ));\r
185 \r
186                                         if ( !element.attributes.type )\r
187                                                 element.attributes.type = 'text/css';\r
188                                 },\r
189 \r
190                                 title : function( element )\r
191                                 {\r
192                                         element.children[ 0 ].value = element.attributes[ '_cke_title' ];\r
193                                 }\r
194                         },\r
195 \r
196                         attributes :\r
197                         {\r
198                                 'class' : function( value, element )\r
199                                 {\r
200                                         // Remove all class names starting with "cke_".\r
201                                         return CKEDITOR.tools.ltrim( value.replace( /(?:^|\s+)cke_[^\s]*/g, '' ) ) || false;\r
202                                 }\r
203                         },\r
204 \r
205                         comment : function( contents )\r
206                         {\r
207                                 // If this is a comment for protected source.\r
208                                 if ( contents.substr( 0, protectedSourceMarker.length ) == protectedSourceMarker )\r
209                                 {\r
210                                         // Remove the extra marker for real comments from it.\r
211                                         if ( contents.substr( protectedSourceMarker.length, 3 ) == '{C}' )\r
212                                                 contents = contents.substr( protectedSourceMarker.length + 3 );\r
213                                         else\r
214                                                 contents = contents.substr( protectedSourceMarker.length );\r
215 \r
216                                         return new CKEDITOR.htmlParser.cdata( decodeURIComponent( contents ) );\r
217                                 }\r
218 \r
219                                 return contents;\r
220                         }\r
221                 };\r
222 \r
223         var defaultHtmlBlockFilterRules = { elements : {} };\r
224 \r
225         for ( i in blockLikeTags )\r
226                 defaultHtmlBlockFilterRules.elements[ i ] = extendBlockForOutput;\r
227 \r
228         if ( CKEDITOR.env.ie )\r
229         {\r
230                 // IE outputs style attribute in capital letters. We should convert\r
231                 // them back to lower case.\r
232                 defaultHtmlFilterRules.attributes.style = function( value, element )\r
233                 {\r
234                         return value.toLowerCase();\r
235                 };\r
236         }\r
237 \r
238         var protectAttributeRegex = /<(?:a|area|img|input)[\s\S]*?\s((?:href|src|name)\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|(?:[^ "'>]+)))/gi;\r
239 \r
240         var protectElementsRegex = /(?:<style(?=[ >])[^>]*>[\s\S]*<\/style>)|(?:<(:?link|meta|base)[^>]*>)/gi,\r
241                 encodedElementsRegex = /<cke:encoded>([^<]*)<\/cke:encoded>/gi;\r
242 \r
243         var protectElementNamesRegex = /(<\/?)((?:object|embed|param|html|body|head|title)[^>]*>)/gi,\r
244                 unprotectElementNamesRegex = /(<\/?)cke:((?:html|body|head|title)[^>]*>)/gi;\r
245 \r
246         var protectSelfClosingRegex = /<cke:(param|embed)([^>]*?)\/?>(?!\s*<\/cke:\1)/gi;\r
247 \r
248         function protectAttributes( html )\r
249         {\r
250                 return html.replace( protectAttributeRegex, '$& _cke_saved_$1' );\r
251         }\r
252 \r
253         function protectElements( html )\r
254         {\r
255                 return html.replace( protectElementsRegex, function( match )\r
256                         {\r
257                                 return '<cke:encoded>' + encodeURIComponent( match ) + '</cke:encoded>';\r
258                         });\r
259         }\r
260 \r
261         function unprotectElements( html )\r
262         {\r
263                 return html.replace( encodedElementsRegex, function( match, encoded )\r
264                         {\r
265                                 return decodeURIComponent( encoded );\r
266                         });\r
267         }\r
268 \r
269         function protectElementsNames( html )\r
270         {\r
271                 return html.replace( protectElementNamesRegex, '$1cke:$2');\r
272         }\r
273 \r
274         function unprotectElementNames( html )\r
275         {\r
276                 return html.replace( unprotectElementNamesRegex, '$1$2' );\r
277         }\r
278 \r
279         function protectSelfClosingElements( html )\r
280         {\r
281                 return html.replace( protectSelfClosingRegex, '<cke:$1$2></cke:$1>' );\r
282         }\r
283 \r
284         function protectRealComments( html )\r
285         {\r
286                 return html.replace( /<!--(?!{cke_protected})[\s\S]+?-->/g, function( match )\r
287                         {\r
288                                 return '<!--' + protectedSourceMarker +\r
289                                                 '{C}' +\r
290                                                 encodeURIComponent( match ).replace( /--/g, '%2D%2D' ) +\r
291                                                 '-->';\r
292                         });\r
293         }\r
294 \r
295         function unprotectRealComments( html )\r
296         {\r
297                 return html.replace( /<!--\{cke_protected\}\{C\}([\s\S]+?)-->/g, function( match, data )\r
298                         {\r
299                                 return decodeURIComponent( data );\r
300                         });\r
301         }\r
302 \r
303         function protectSource( data, protectRegexes )\r
304         {\r
305                 var protectedHtml = [],\r
306                         tempRegex = /<\!--\{cke_temp(comment)?\}(\d*?)-->/g;\r
307 \r
308                 var regexes =\r
309                         [\r
310                                 // Script tags will also be forced to be protected, otherwise\r
311                                 // IE will execute them.\r
312                                 ( /<script[\s\S]*?<\/script>/gi ),\r
313 \r
314                                 // <noscript> tags (get lost in IE and messed up in FF).\r
315                                 /<noscript[\s\S]*?<\/noscript>/gi\r
316                         ]\r
317                         .concat( protectRegexes );\r
318 \r
319                 // First of any other protection, we must protect all comments\r
320                 // to avoid loosing them (of course, IE related).\r
321                 // Note that we use a different tag for comments, as we need to\r
322                 // transform them when applying filters.\r
323                 data = data.replace( (/<!--[\s\S]*?-->/g), function( match )\r
324                         {\r
325                                 return  '<!--{cke_tempcomment}' + ( protectedHtml.push( match ) - 1 ) + '-->';\r
326                         });\r
327 \r
328                 for ( var i = 0 ; i < regexes.length ; i++ )\r
329                 {\r
330                         data = data.replace( regexes[i], function( match )\r
331                                 {\r
332                                         match = match.replace( tempRegex,               // There could be protected source inside another one. (#3869).\r
333                                                 function( $, isComment, id )\r
334                                                 {\r
335                                                         return protectedHtml[ id ];\r
336                                                 }\r
337                                         );\r
338                                         return  '<!--{cke_temp}' + ( protectedHtml.push( match ) - 1 ) + '-->';\r
339                                 });\r
340                 }\r
341                 data = data.replace( tempRegex, function( $, isComment, id )\r
342                         {\r
343                                 return '<!--' + protectedSourceMarker +\r
344                                                 ( isComment ? '{C}' : '' ) +\r
345                                                 encodeURIComponent( protectedHtml[ id ] ).replace( /--/g, '%2D%2D' ) +\r
346                                                 '-->';\r
347                         }\r
348                 );\r
349                 return data;\r
350         }\r
351 \r
352         CKEDITOR.plugins.add( 'htmldataprocessor',\r
353         {\r
354                 requires : [ 'htmlwriter' ],\r
355 \r
356                 init : function( editor )\r
357                 {\r
358                         var dataProcessor = editor.dataProcessor = new CKEDITOR.htmlDataProcessor( editor );\r
359 \r
360                         dataProcessor.writer.forceSimpleAmpersand = editor.config.forceSimpleAmpersand;\r
361 \r
362                         dataProcessor.dataFilter.addRules( defaultDataFilterRules );\r
363                         dataProcessor.dataFilter.addRules( defaultDataBlockFilterRules );\r
364                         dataProcessor.htmlFilter.addRules( defaultHtmlFilterRules );\r
365                         dataProcessor.htmlFilter.addRules( defaultHtmlBlockFilterRules );\r
366                 }\r
367         });\r
368 \r
369         CKEDITOR.htmlDataProcessor = function( editor )\r
370         {\r
371                 this.editor = editor;\r
372 \r
373                 this.writer = new CKEDITOR.htmlWriter();\r
374                 this.dataFilter = new CKEDITOR.htmlParser.filter();\r
375                 this.htmlFilter = new CKEDITOR.htmlParser.filter();\r
376         };\r
377 \r
378         CKEDITOR.htmlDataProcessor.prototype =\r
379         {\r
380                 toHtml : function( data, fixForBody )\r
381                 {\r
382                         // The source data is already HTML, but we need to clean\r
383                         // it up and apply the filter.\r
384 \r
385                         data = protectSource( data, this.editor.config.protectedSource );\r
386 \r
387                         // Before anything, we must protect the URL attributes as the\r
388                         // browser may changing them when setting the innerHTML later in\r
389                         // the code.\r
390                         data = protectAttributes( data );\r
391 \r
392                         // Protect elements than can't be set inside a DIV. E.g. IE removes\r
393                         // style tags from innerHTML. (#3710)\r
394                         data = protectElements( data );\r
395 \r
396                         // Certain elements has problem to go through DOM operation, protect\r
397                         // them by prefixing 'cke' namespace. (#3591)\r
398                         data = protectElementsNames( data );\r
399 \r
400                         // All none-IE browsers ignore self-closed custom elements,\r
401                         // protecting them into open-close. (#3591)\r
402                         data = protectSelfClosingElements( data );\r
403 \r
404                         // Call the browser to help us fixing a possibly invalid HTML\r
405                         // structure.\r
406                         var div = new CKEDITOR.dom.element( 'div' );\r
407                         // Add fake character to workaround IE comments bug. (#3801)\r
408                         div.setHtml( 'a' + data );\r
409                         data = div.getHtml().substr( 1 );\r
410 \r
411                         // Unprotect "some" of the protected elements at this point.\r
412                         data = unprotectElementNames( data );\r
413 \r
414                         data = unprotectElements( data );\r
415 \r
416                         // Restore the comments that have been protected, in this way they\r
417                         // can be properly filtered.\r
418                         data = unprotectRealComments( data );\r
419 \r
420                         // Now use our parser to make further fixes to the structure, as\r
421                         // well as apply the filter.\r
422                         var fragment = CKEDITOR.htmlParser.fragment.fromHtml( data, fixForBody ),\r
423                                 writer = new CKEDITOR.htmlParser.basicWriter();\r
424 \r
425                         fragment.writeHtml( writer, this.dataFilter );\r
426                         data = writer.getHtml( true );\r
427 \r
428                         // Protect the real comments again.\r
429                         data = protectRealComments( data );\r
430 \r
431                         return data;\r
432                 },\r
433 \r
434                 toDataFormat : function( html, fixForBody )\r
435                 {\r
436                         var writer = this.writer,\r
437                                 fragment = CKEDITOR.htmlParser.fragment.fromHtml( html, fixForBody );\r
438 \r
439                         writer.reset();\r
440 \r
441                         fragment.writeHtml( writer, this.htmlFilter );\r
442 \r
443                         return writer.getHtml( true );\r
444                 }\r
445         };\r
446 })();\r
447 \r
448 /**\r
449  * Whether to force using "&" instead of "&amp;amp;" in elements attributes\r
450  * values. It's not recommended to change this setting for compliance with the\r
451  * W3C XHTML 1.0 standards\r
452  * (<a href="http://www.w3.org/TR/xhtml1/#C_12">C.12, XHTML 1.0</a>).\r
453  * @type Boolean\r
454  * @default false\r
455  * @example\r
456  * config.forceSimpleAmpersand = false;\r
457  */\r
458 CKEDITOR.config.forceSimpleAmpersand = false;\r