JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
vanilla ckeditor-3.4.3
[ckeditor.git] / _source / plugins / htmldataprocessor / plugin.js
1 /*\r
2 Copyright (c) 2003-2010, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 (function()\r
7 {\r
8         // Regex to scan for   at the end of blocks, which are actually placeholders.\r
9         // Safari transforms the   to \xa0. (#4172)\r
10         var tailNbspRegex = /^[\t\r\n ]*(?: |\xa0)$/;\r
11 \r
12         var protectedSourceMarker = '{cke_protected}';\r
13 \r
14         // Return the last non-space child node of the block (#4344).\r
15         function lastNoneSpaceChild( block )\r
16         {\r
17                 var lastIndex = block.children.length,\r
18                         last = block.children[ lastIndex - 1 ];\r
19                 while (  last && last.type == CKEDITOR.NODE_TEXT && !CKEDITOR.tools.trim( last.value ) )\r
20                         last = block.children[ --lastIndex ];\r
21                 return last;\r
22         }\r
23 \r
24         function trimFillers( block, fromSource )\r
25         {\r
26                 // If the current node is a block, and if we're converting from source or\r
27                 // we're not in IE then search for and remove any tailing BR node.\r
28                 //\r
29                 // Also, any   at the end of blocks are fillers, remove them as well.\r
30                 // (#2886)\r
31                 var children = block.children, lastChild = lastNoneSpaceChild( block );\r
32                 if ( lastChild )\r
33                 {\r
34                         if ( ( fromSource || !CKEDITOR.env.ie ) && lastChild.type == CKEDITOR.NODE_ELEMENT && lastChild.name == 'br' )\r
35                                 children.pop();\r
36                         if ( lastChild.type == CKEDITOR.NODE_TEXT && tailNbspRegex.test( lastChild.value ) )\r
37                                 children.pop();\r
38                 }\r
39         }\r
40 \r
41         function blockNeedsExtension( block, fromSource )\r
42         {\r
43         // 1. For IE version >=8,  empty blocks are displayed correctly themself in wysiwiyg;\r
44         // 2. For the rest, at least table cell and list item need no filler space.\r
45         // (#6248)\r
46         if ( fromSource && CKEDITOR.env.ie &&\r
47                 ( document.documentMode > 7\r
48                 || block.name in CKEDITOR.dtd.tr\r
49                 || block.name in CKEDITOR.dtd.$listItem ) )\r
50             return false;\r
51 \r
52                 var lastChild = lastNoneSpaceChild( block );\r
53 \r
54                 return !lastChild\r
55                         || lastChild.type == CKEDITOR.NODE_ELEMENT && lastChild.name == 'br'\r
56                         // Some of the controls in form needs extension too,\r
57                         // to move cursor at the end of the form. (#4791)\r
58                         || block.name == 'form' && lastChild.name == 'input';\r
59         }\r
60 \r
61         function extendBlockForDisplay( block )\r
62         {\r
63                 trimFillers( block, true );\r
64 \r
65                 if ( blockNeedsExtension( block, true ) )\r
66                 {\r
67                         if ( CKEDITOR.env.ie )\r
68                                 block.add( new CKEDITOR.htmlParser.text( '\xa0' ) );\r
69                         else\r
70                                 block.add( new CKEDITOR.htmlParser.element( 'br', {} ) );\r
71                 }\r
72         }\r
73 \r
74         function extendBlockForOutput( block )\r
75         {\r
76                 trimFillers( block );\r
77 \r
78                 if ( blockNeedsExtension( block ) )\r
79                         block.add( new CKEDITOR.htmlParser.text( '\xa0' ) );\r
80         }\r
81 \r
82         var dtd = CKEDITOR.dtd;\r
83 \r
84         // Find out the list of block-like tags that can contain <br>.\r
85         var blockLikeTags = CKEDITOR.tools.extend( {}, dtd.$block, dtd.$listItem, dtd.$tableContent );\r
86         for ( var i in blockLikeTags )\r
87         {\r
88                 if ( ! ( 'br' in dtd[i] ) )\r
89                         delete blockLikeTags[i];\r
90         }\r
91         // We just avoid filler in <pre> right now.\r
92         // TODO: Support filler for <pre>, line break is also occupy line height.\r
93         delete blockLikeTags.pre;\r
94         var defaultDataFilterRules =\r
95         {\r
96                 elements : {},\r
97                 attributeNames :\r
98                 [\r
99                         // Event attributes (onXYZ) must not be directly set. They can become\r
100                         // active in the editing area (IE|WebKit).\r
101                         [ ( /^on/ ), '_cke_pa_on' ]\r
102                 ]\r
103         };\r
104 \r
105         var defaultDataBlockFilterRules = { elements : {} };\r
106 \r
107         for ( i in blockLikeTags )\r
108                 defaultDataBlockFilterRules.elements[ i ] = extendBlockForDisplay;\r
109 \r
110         var defaultHtmlFilterRules =\r
111                 {\r
112                         elementNames :\r
113                         [\r
114                                 // Remove the "cke:" namespace prefix.\r
115                                 [ ( /^cke:/ ), '' ],\r
116 \r
117                                 // Ignore <?xml:namespace> tags.\r
118                                 [ ( /^\?xml:namespace$/ ), '' ]\r
119                         ],\r
120 \r
121                         attributeNames :\r
122                         [\r
123                                 // Attributes saved for changes and protected attributes.\r
124                                 [ ( /^_cke_(saved|pa)_/ ), '' ],\r
125 \r
126                                 // All "_cke" attributes are to be ignored.\r
127                                 [ ( /^_cke.*/ ), '' ],\r
128 \r
129                                 [ 'hidefocus', '' ]\r
130                         ],\r
131 \r
132                         elements :\r
133                         {\r
134                                 $ : function( element )\r
135                                 {\r
136                                         var attribs = element.attributes;\r
137 \r
138                                         if ( attribs )\r
139                                         {\r
140                                                 // Elements marked as temporary are to be ignored.\r
141                                                 if ( attribs.cke_temp )\r
142                                                         return false;\r
143 \r
144                                                 // Remove duplicated attributes - #3789.\r
145                                                 var attributeNames = [ 'name', 'href', 'src' ],\r
146                                                         savedAttributeName;\r
147                                                 for ( var i = 0 ; i < attributeNames.length ; i++ )\r
148                                                 {\r
149                                                         savedAttributeName = '_cke_saved_' + attributeNames[ i ];\r
150                                                         savedAttributeName in attribs && ( delete attribs[ attributeNames[ i ] ] );\r
151                                                 }\r
152                                         }\r
153 \r
154                                         return element;\r
155                                 },\r
156 \r
157                                 embed : function( element )\r
158                                 {\r
159                                         var parent = element.parent;\r
160 \r
161                                         // If the <embed> is child of a <object>, copy the width\r
162                                         // and height attributes from it.\r
163                                         if ( parent && parent.name == 'object' )\r
164                                         {\r
165                                                 var parentWidth = parent.attributes.width,\r
166                                                         parentHeight = parent.attributes.height;\r
167                                                 parentWidth && ( element.attributes.width = parentWidth );\r
168                                                 parentHeight && ( element.attributes.height = parentHeight );\r
169                                         }\r
170                                 },\r
171                                 // Restore param elements into self-closing.\r
172                                 param : function( param )\r
173                                 {\r
174                                         param.children = [];\r
175                                         param.isEmpty = true;\r
176                                         return param;\r
177                                 },\r
178 \r
179                                 // Remove empty link but not empty anchor.(#3829)\r
180                                 a : function( element )\r
181                                 {\r
182                                         if ( !( element.children.length ||\r
183                                                         element.attributes.name ||\r
184                                                         element.attributes._cke_saved_name ) )\r
185                                         {\r
186                                                 return false;\r
187                                         }\r
188                                 },\r
189 \r
190                                 html : function( element )\r
191                                 {\r
192                                         delete element.attributes.contenteditable;\r
193                                         delete element.attributes[ 'class' ];\r
194                                 },\r
195 \r
196                                 body : function( element )\r
197                                 {\r
198                                         delete element.attributes.spellcheck;\r
199                                         delete element.attributes.contenteditable;\r
200                                 },\r
201 \r
202                                 style : function( element )\r
203                                 {\r
204                                         var child = element.children[ 0 ];\r
205                                         child && child.value && ( child.value = CKEDITOR.tools.trim( child.value ));\r
206 \r
207                                         if ( !element.attributes.type )\r
208                                                 element.attributes.type = 'text/css';\r
209                                 },\r
210 \r
211                                 title : function( element )\r
212                                 {\r
213                                         var titleText = element.children[ 0 ];\r
214                                         titleText && ( titleText.value = element.attributes[ '_cke_title' ] || '' );\r
215                                 }\r
216                         },\r
217 \r
218                         attributes :\r
219                         {\r
220                                 'class' : function( value, element )\r
221                                 {\r
222                                         // Remove all class names starting with "cke_".\r
223                                         return CKEDITOR.tools.ltrim( value.replace( /(?:^|\s+)cke_[^\s]*/g, '' ) ) || false;\r
224                                 }\r
225                         },\r
226 \r
227                         comment : function( contents )\r
228                         {\r
229                                 // If this is a comment for protected source.\r
230                                 if ( contents.substr( 0, protectedSourceMarker.length ) == protectedSourceMarker )\r
231                                 {\r
232                                         // Remove the extra marker for real comments from it.\r
233                                         if ( contents.substr( protectedSourceMarker.length, 3 ) == '{C}' )\r
234                                                 contents = contents.substr( protectedSourceMarker.length + 3 );\r
235                                         else\r
236                                                 contents = contents.substr( protectedSourceMarker.length );\r
237 \r
238                                         return new CKEDITOR.htmlParser.cdata( decodeURIComponent( contents ) );\r
239                                 }\r
240 \r
241                                 return contents;\r
242                         }\r
243                 };\r
244 \r
245         var defaultHtmlBlockFilterRules = { elements : {} };\r
246 \r
247         for ( i in blockLikeTags )\r
248                 defaultHtmlBlockFilterRules.elements[ i ] = extendBlockForOutput;\r
249 \r
250         if ( CKEDITOR.env.ie )\r
251         {\r
252                 // IE outputs style attribute in capital letters. We should convert\r
253                 // them back to lower case.\r
254                 defaultHtmlFilterRules.attributes.style = function( value, element )\r
255                 {\r
256                         return value.toLowerCase();\r
257                 };\r
258         }\r
259 \r
260         function protectReadOnly( element )\r
261         {\r
262                 element.attributes.contenteditable = "false";\r
263         }\r
264         function unprotectReadyOnly( element )\r
265         {\r
266                 delete element.attributes.contenteditable;\r
267         }\r
268         // Disable form elements editing mode provided by some browers. (#5746)\r
269         for ( i in { input : 1, textarea : 1 } )\r
270         {\r
271                 defaultDataFilterRules.elements[ i ] = protectReadOnly;\r
272                 defaultHtmlFilterRules.elements[ i ] = unprotectReadyOnly;\r
273         }\r
274 \r
275         var protectAttributeRegex = /<((?:a|area|img|input)\b[\s\S]*?\s)((href|src|name)\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|(?:[^ "'>]+)))([^>]*)>/gi,\r
276                 findSavedSrcRegex = /\s_cke_saved_src\s*=/;\r
277 \r
278         var protectElementsRegex = /(?:<style(?=[ >])[^>]*>[\s\S]*<\/style>)|(?:<(:?link|meta|base)[^>]*>)/gi,\r
279                 encodedElementsRegex = /<cke:encoded>([^<]*)<\/cke:encoded>/gi;\r
280 \r
281         var protectElementNamesRegex = /(<\/?)((?:object|embed|param|html|body|head|title)[^>]*>)/gi,\r
282                 unprotectElementNamesRegex = /(<\/?)cke:((?:html|body|head|title)[^>]*>)/gi;\r
283 \r
284         var protectSelfClosingRegex = /<cke:(param|embed)([^>]*?)\/?>(?!\s*<\/cke:\1)/gi;\r
285 \r
286         function protectAttributes( html )\r
287         {\r
288                 return html.replace( protectAttributeRegex, function( tag, beginning, fullAttr, attrName, end )\r
289                         {\r
290                                 // We should not rewrite the _cke_saved_src attribute (#5218)\r
291                                 if ( attrName == 'src' && findSavedSrcRegex.test( tag ) )\r
292                                         return tag;\r
293                                 else\r
294                                         return '<' + beginning + fullAttr + ' _cke_saved_' + fullAttr + end + '>';\r
295                         });\r
296         }\r
297 \r
298         function protectElements( html )\r
299         {\r
300                 return html.replace( protectElementsRegex, function( match )\r
301                         {\r
302                                 return '<cke:encoded>' + encodeURIComponent( match ) + '</cke:encoded>';\r
303                         });\r
304         }\r
305 \r
306         function unprotectElements( html )\r
307         {\r
308                 return html.replace( encodedElementsRegex, function( match, encoded )\r
309                         {\r
310                                 return decodeURIComponent( encoded );\r
311                         });\r
312         }\r
313 \r
314         function protectElementsNames( html )\r
315         {\r
316                 return html.replace( protectElementNamesRegex, '$1cke:$2');\r
317         }\r
318 \r
319         function unprotectElementNames( html )\r
320         {\r
321                 return html.replace( unprotectElementNamesRegex, '$1$2' );\r
322         }\r
323 \r
324         function protectSelfClosingElements( html )\r
325         {\r
326                 return html.replace( protectSelfClosingRegex, '<cke:$1$2></cke:$1>' );\r
327         }\r
328 \r
329         function protectPreFormatted( html )\r
330         {\r
331                 return html.replace( /(<pre\b[^>]*>)(\r\n|\n)/g, '$1$2$2' );\r
332         }\r
333 \r
334         function protectRealComments( html )\r
335         {\r
336                 return html.replace( /<!--(?!{cke_protected})[\s\S]+?-->/g, function( match )\r
337                         {\r
338                                 return '<!--' + protectedSourceMarker +\r
339                                                 '{C}' +\r
340                                                 encodeURIComponent( match ).replace( /--/g, '%2D%2D' ) +\r
341                                                 '-->';\r
342                         });\r
343         }\r
344 \r
345         function unprotectRealComments( html )\r
346         {\r
347                 return html.replace( /<!--\{cke_protected\}\{C\}([\s\S]+?)-->/g, function( match, data )\r
348                         {\r
349                                 return decodeURIComponent( data );\r
350                         });\r
351         }\r
352 \r
353         function protectSource( data, protectRegexes )\r
354         {\r
355                 var protectedHtml = [],\r
356                         tempRegex = /<\!--\{cke_temp(comment)?\}(\d*?)-->/g;\r
357 \r
358                 var regexes =\r
359                         [\r
360                                 // Script tags will also be forced to be protected, otherwise\r
361                                 // IE will execute them.\r
362                                 ( /<script[\s\S]*?<\/script>/gi ),\r
363 \r
364                                 // <noscript> tags (get lost in IE and messed up in FF).\r
365                                 /<noscript[\s\S]*?<\/noscript>/gi\r
366                         ]\r
367                         .concat( protectRegexes );\r
368 \r
369                 // First of any other protection, we must protect all comments\r
370                 // to avoid loosing them (of course, IE related).\r
371                 // Note that we use a different tag for comments, as we need to\r
372                 // transform them when applying filters.\r
373                 data = data.replace( (/<!--[\s\S]*?-->/g), function( match )\r
374                         {\r
375                                 return  '<!--{cke_tempcomment}' + ( protectedHtml.push( match ) - 1 ) + '-->';\r
376                         });\r
377 \r
378                 for ( var i = 0 ; i < regexes.length ; i++ )\r
379                 {\r
380                         data = data.replace( regexes[i], function( match )\r
381                                 {\r
382                                         match = match.replace( tempRegex,               // There could be protected source inside another one. (#3869).\r
383                                                 function( $, isComment, id )\r
384                                                 {\r
385                                                         return protectedHtml[ id ];\r
386                                                 }\r
387                                         );\r
388                                         return  '<!--{cke_temp}' + ( protectedHtml.push( match ) - 1 ) + '-->';\r
389                                 });\r
390                 }\r
391                 data = data.replace( tempRegex, function( $, isComment, id )\r
392                         {\r
393                                 return '<!--' + protectedSourceMarker +\r
394                                                 ( isComment ? '{C}' : '' ) +\r
395                                                 encodeURIComponent( protectedHtml[ id ] ).replace( /--/g, '%2D%2D' ) +\r
396                                                 '-->';\r
397                         }\r
398                 );\r
399                 return data;\r
400         }\r
401 \r
402         CKEDITOR.plugins.add( 'htmldataprocessor',\r
403         {\r
404                 requires : [ 'htmlwriter' ],\r
405 \r
406                 init : function( editor )\r
407                 {\r
408                         var dataProcessor = editor.dataProcessor = new CKEDITOR.htmlDataProcessor( editor );\r
409 \r
410                         dataProcessor.writer.forceSimpleAmpersand = editor.config.forceSimpleAmpersand;\r
411 \r
412                         dataProcessor.dataFilter.addRules( defaultDataFilterRules );\r
413                         dataProcessor.dataFilter.addRules( defaultDataBlockFilterRules );\r
414                         dataProcessor.htmlFilter.addRules( defaultHtmlFilterRules );\r
415                         dataProcessor.htmlFilter.addRules( defaultHtmlBlockFilterRules );\r
416                 }\r
417         });\r
418 \r
419         CKEDITOR.htmlDataProcessor = function( editor )\r
420         {\r
421                 this.editor = editor;\r
422 \r
423                 this.writer = new CKEDITOR.htmlWriter();\r
424                 this.dataFilter = new CKEDITOR.htmlParser.filter();\r
425                 this.htmlFilter = new CKEDITOR.htmlParser.filter();\r
426         };\r
427 \r
428         CKEDITOR.htmlDataProcessor.prototype =\r
429         {\r
430                 toHtml : function( data, fixForBody )\r
431                 {\r
432                         // The source data is already HTML, but we need to clean\r
433                         // it up and apply the filter.\r
434 \r
435                         data = protectSource( data, this.editor.config.protectedSource );\r
436 \r
437                         // Before anything, we must protect the URL attributes as the\r
438                         // browser may changing them when setting the innerHTML later in\r
439                         // the code.\r
440                         data = protectAttributes( data );\r
441 \r
442                         // Protect elements than can't be set inside a DIV. E.g. IE removes\r
443                         // style tags from innerHTML. (#3710)\r
444                         data = protectElements( data );\r
445 \r
446                         // Certain elements has problem to go through DOM operation, protect\r
447                         // them by prefixing 'cke' namespace. (#3591)\r
448                         data = protectElementsNames( data );\r
449 \r
450                         // All none-IE browsers ignore self-closed custom elements,\r
451                         // protecting them into open-close. (#3591)\r
452                         data = protectSelfClosingElements( data );\r
453 \r
454                         // Compensate one leading line break after <pre> open as browsers\r
455                         // eat it up. (#5789)\r
456                         data = protectPreFormatted( data );\r
457 \r
458                         // Call the browser to help us fixing a possibly invalid HTML\r
459                         // structure.\r
460                         var div = new CKEDITOR.dom.element( 'div' );\r
461                         // Add fake character to workaround IE comments bug. (#3801)\r
462                         div.setHtml( 'a' + data );\r
463                         data = div.getHtml().substr( 1 );\r
464 \r
465                         // Unprotect "some" of the protected elements at this point.\r
466                         data = unprotectElementNames( data );\r
467 \r
468                         data = unprotectElements( data );\r
469 \r
470                         // Restore the comments that have been protected, in this way they\r
471                         // can be properly filtered.\r
472                         data = unprotectRealComments( data );\r
473 \r
474                         // Now use our parser to make further fixes to the structure, as\r
475                         // well as apply the filter.\r
476                         var fragment = CKEDITOR.htmlParser.fragment.fromHtml( data, fixForBody ),\r
477                                 writer = new CKEDITOR.htmlParser.basicWriter();\r
478 \r
479                         fragment.writeHtml( writer, this.dataFilter );\r
480                         data = writer.getHtml( true );\r
481 \r
482                         // Protect the real comments again.\r
483                         data = protectRealComments( data );\r
484 \r
485                         return data;\r
486                 },\r
487 \r
488                 toDataFormat : function( html, fixForBody )\r
489                 {\r
490                         var writer = this.writer,\r
491                                 fragment = CKEDITOR.htmlParser.fragment.fromHtml( html, fixForBody );\r
492 \r
493                         writer.reset();\r
494 \r
495                         fragment.writeHtml( writer, this.htmlFilter );\r
496 \r
497                         return writer.getHtml( true );\r
498                 }\r
499         };\r
500 })();\r
501 \r
502 /**\r
503  * Whether to force using "&" instead of "&amp;amp;" in elements attributes\r
504  * values, it's not recommended to change this setting for compliance with the\r
505  * W3C XHTML 1.0 standards (<a href="http://www.w3.org/TR/xhtml1/#C_12">C.12, XHTML 1.0</a>).\r
506  * @name CKEDITOR.config.forceSimpleAmpersand\r
507  * @type Boolean\r
508  * @default false\r
509  * @example\r
510  * config.forceSimpleAmpersand = false;\r
511  */\r