JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
f69f7cdca5a42b1ea0b63a5b353e5ca20a81f35b
[ckeditor.git] / _source / core / htmlparser / fragment.js
1 /*\r
2 Copyright (c) 2003-2011, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 /**\r
7  * A lightweight representation of an HTML DOM structure.\r
8  * @constructor\r
9  * @example\r
10  */\r
11 CKEDITOR.htmlParser.fragment = function()\r
12 {\r
13         /**\r
14          * The nodes contained in the root of this fragment.\r
15          * @type Array\r
16          * @example\r
17          * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );\r
18          * alert( fragment.children.length );  "2"\r
19          */\r
20         this.children = [];\r
21 \r
22         /**\r
23          * Get the fragment parent. Should always be null.\r
24          * @type Object\r
25          * @default null\r
26          * @example\r
27          */\r
28         this.parent = null;\r
29 \r
30         /** @private */\r
31         this._ =\r
32         {\r
33                 isBlockLike : true,\r
34                 hasInlineStarted : false\r
35         };\r
36 };\r
37 \r
38 (function()\r
39 {\r
40         // Block-level elements whose internal structure should be respected during\r
41         // parser fixing.\r
42         var nonBreakingBlocks = CKEDITOR.tools.extend( { table:1,ul:1,ol:1,dl:1 }, CKEDITOR.dtd.table, CKEDITOR.dtd.ul, CKEDITOR.dtd.ol, CKEDITOR.dtd.dl );\r
43 \r
44         // IE < 8 don't output the close tag on definition list items. (#6975)\r
45         var optionalCloseTags = CKEDITOR.env.ie && CKEDITOR.env.version < 8 ? { dd : 1, dt :1 } : {};\r
46 \r
47         var listBlocks = { ol:1, ul:1 };\r
48 \r
49         // Dtd of the fragment element, basically it accept anything except for intermediate structure, e.g. orphan <li>.\r
50         var rootDtd = CKEDITOR.tools.extend( {}, { html: 1 }, CKEDITOR.dtd.html, CKEDITOR.dtd.body, CKEDITOR.dtd.head, { style:1,script:1 } );\r
51 \r
52         function isRemoveEmpty( node )\r
53         {\r
54                 // Empty link is to be removed when empty but not anchor. (#7894)\r
55                 return node.name == 'a' && node.attributes.href\r
56                         || CKEDITOR.dtd.$removeEmpty[ node.name ];\r
57         }\r
58 \r
59         /**\r
60          * Creates a {@link CKEDITOR.htmlParser.fragment} from an HTML string.\r
61          * @param {String} fragmentHtml The HTML to be parsed, filling the fragment.\r
62          * @param {Number} [fixForBody=false] Wrap body with specified element if needed.\r
63          * @param {CKEDITOR.htmlParser.element} contextNode Parse the html as the content of this element.\r
64          * @returns CKEDITOR.htmlParser.fragment The fragment created.\r
65          * @example\r
66          * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );\r
67          * alert( fragment.children[0].name );  "b"\r
68          * alert( fragment.children[1].value );  " Text"\r
69          */\r
70         CKEDITOR.htmlParser.fragment.fromHtml = function( fragmentHtml, fixForBody, contextNode )\r
71         {\r
72                 var parser = new CKEDITOR.htmlParser(),\r
73                         fragment = contextNode || new CKEDITOR.htmlParser.fragment(),\r
74                         pendingInline = [],\r
75                         pendingBRs = [],\r
76                         currentNode = fragment,\r
77                     // Indicate we're inside a <textarea> element, spaces should be touched differently.\r
78                         inTextarea = false,\r
79                     // Indicate we're inside a <pre> element, spaces should be touched differently.\r
80                         inPre = false;\r
81 \r
82                 function checkPending( newTagName )\r
83                 {\r
84                         var pendingBRsSent;\r
85 \r
86                         if ( pendingInline.length > 0 )\r
87                         {\r
88                                 for ( var i = 0 ; i < pendingInline.length ; i++ )\r
89                                 {\r
90                                         var pendingElement = pendingInline[ i ],\r
91                                                 pendingName = pendingElement.name,\r
92                                                 pendingDtd = CKEDITOR.dtd[ pendingName ],\r
93                                                 currentDtd = currentNode.name && CKEDITOR.dtd[ currentNode.name ];\r
94 \r
95                                         if ( ( !currentDtd || currentDtd[ pendingName ] ) && ( !newTagName || !pendingDtd || pendingDtd[ newTagName ] || !CKEDITOR.dtd[ newTagName ] ) )\r
96                                         {\r
97                                                 if ( !pendingBRsSent )\r
98                                                 {\r
99                                                         sendPendingBRs();\r
100                                                         pendingBRsSent = 1;\r
101                                                 }\r
102 \r
103                                                 // Get a clone for the pending element.\r
104                                                 pendingElement = pendingElement.clone();\r
105 \r
106                                                 // Add it to the current node and make it the current,\r
107                                                 // so the new element will be added inside of it.\r
108                                                 pendingElement.parent = currentNode;\r
109                                                 currentNode = pendingElement;\r
110 \r
111                                                 // Remove the pending element (back the index by one\r
112                                                 // to properly process the next entry).\r
113                                                 pendingInline.splice( i, 1 );\r
114                                                 i--;\r
115                                         }\r
116                                         else\r
117                                         {\r
118                                                 // Some element of the same type cannot be nested, flat them,\r
119                                                 // e.g. <a href="#">foo<a href="#">bar</a></a>. (#7894)\r
120                                                 if ( pendingName == currentNode.name )\r
121                                                         addElement( currentNode, currentNode.parent, 1 ), i--;\r
122                                         }\r
123                                 }\r
124                         }\r
125                 }\r
126 \r
127                 function sendPendingBRs()\r
128                 {\r
129                         while ( pendingBRs.length )\r
130                                 currentNode.add( pendingBRs.shift() );\r
131                 }\r
132 \r
133                 /*\r
134                 * Beside of simply append specified element to target, this function also takes\r
135                 * care of other dirty lifts like forcing block in body, trimming spaces at\r
136                 * the block boundaries etc.\r
137                 *\r
138                 * @param {Element} element  The element to be added as the last child of {@link target}.\r
139                 * @param {Element} target The parent element to relieve the new node.\r
140                 * @param {Boolean} [moveCurrent=false] Don't change the "currentNode" global unless\r
141                 * there's a return point node specified on the element, otherwise move current onto {@link target} node.\r
142                  */\r
143                 function addElement( element, target, moveCurrent )\r
144                 {\r
145                         // Ignore any element that has already been added.\r
146                         if ( element.previous !== undefined )\r
147                                 return;\r
148 \r
149                         target = target || currentNode || fragment;\r
150 \r
151                         // Current element might be mangled by fix body below,\r
152                         // save it for restore later.\r
153                         var savedCurrent = currentNode;\r
154 \r
155                         // If the target is the fragment and this inline element can't go inside\r
156                         // body (if fixForBody).\r
157                         if ( fixForBody && ( !target.type || target.name == 'body' ) )\r
158                         {\r
159                                 var elementName, realElementName;\r
160                                 if ( element.attributes\r
161                                          && ( realElementName =\r
162                                                   element.attributes[ 'data-cke-real-element-type' ] ) )\r
163                                         elementName = realElementName;\r
164                                 else\r
165                                         elementName =  element.name;\r
166 \r
167                                 if ( elementName && !( elementName in CKEDITOR.dtd.$body || elementName == 'body' || element.isOrphan ) )\r
168                                 {\r
169                                         // Create a <p> in the fragment.\r
170                                         currentNode = target;\r
171                                         parser.onTagOpen( fixForBody, {} );\r
172 \r
173                                         // The new target now is the <p>.\r
174                                         element.returnPoint = target = currentNode;\r
175                                 }\r
176                         }\r
177 \r
178                         // Rtrim empty spaces on block end boundary. (#3585)\r
179                         if ( element._.isBlockLike\r
180                                  && element.name != 'pre' && element.name != 'textarea' )\r
181                         {\r
182 \r
183                                 var length = element.children.length,\r
184                                         lastChild = element.children[ length - 1 ],\r
185                                         text;\r
186                                 if ( lastChild && lastChild.type == CKEDITOR.NODE_TEXT )\r
187                                 {\r
188                                         if ( !( text = CKEDITOR.tools.rtrim( lastChild.value ) ) )\r
189                                                 element.children.length = length -1;\r
190                                         else\r
191                                                 lastChild.value = text;\r
192                                 }\r
193                         }\r
194 \r
195                         target.add( element );\r
196 \r
197                         if ( element.returnPoint )\r
198                         {\r
199                                 currentNode = element.returnPoint;\r
200                                 delete element.returnPoint;\r
201                         }\r
202                         else\r
203                                 currentNode = moveCurrent ? target : savedCurrent;\r
204                 }\r
205 \r
206                 parser.onTagOpen = function( tagName, attributes, selfClosing, optionalClose )\r
207                 {\r
208                         var element = new CKEDITOR.htmlParser.element( tagName, attributes );\r
209 \r
210                         // "isEmpty" will be always "false" for unknown elements, so we\r
211                         // must force it if the parser has identified it as a selfClosing tag.\r
212                         if ( element.isUnknown && selfClosing )\r
213                                 element.isEmpty = true;\r
214 \r
215                         // Check for optional closed elements, including browser quirks and manually opened blocks.\r
216                         element.isOptionalClose = tagName in optionalCloseTags || optionalClose;\r
217 \r
218                         // This is a tag to be removed if empty, so do not add it immediately.\r
219                         if ( isRemoveEmpty( element ) )\r
220                         {\r
221                                 pendingInline.push( element );\r
222                                 return;\r
223                         }\r
224                         else if ( tagName == 'pre' )\r
225                                 inPre = true;\r
226                         else if ( tagName == 'br' && inPre )\r
227                         {\r
228                                 currentNode.add( new CKEDITOR.htmlParser.text( '\n' ) );\r
229                                 return;\r
230                         }\r
231                         else if ( tagName == 'textarea' )\r
232                                 inTextarea = true;\r
233 \r
234                         if ( tagName == 'br' )\r
235                         {\r
236                                 pendingBRs.push( element );\r
237                                 return;\r
238                         }\r
239 \r
240                         while( 1 )\r
241                         {\r
242                                 var currentName = currentNode.name;\r
243 \r
244                                 var currentDtd = currentName ? ( CKEDITOR.dtd[ currentName ]\r
245                                                 || ( currentNode._.isBlockLike ? CKEDITOR.dtd.div : CKEDITOR.dtd.span ) )\r
246                                                 : rootDtd;\r
247 \r
248                                 // If the element cannot be child of the current element.\r
249                                 if ( !element.isUnknown && !currentNode.isUnknown && !currentDtd[ tagName ] )\r
250                                 {\r
251                                         // Current node doesn't have a close tag, time for a close\r
252                                         // as this element isn't fit in. (#7497)\r
253                                         if ( currentNode.isOptionalClose )\r
254                                                 parser.onTagClose( currentName );\r
255                                         // Fixing malformed nested lists by moving it into a previous list item. (#3828)\r
256                                         else if ( tagName in listBlocks\r
257                                                 && currentName in listBlocks )\r
258                                         {\r
259                                                 var children = currentNode.children,\r
260                                                         lastChild = children[ children.length - 1 ];\r
261 \r
262                                                 // Establish the list item if it's not existed.\r
263                                                 if ( !( lastChild && lastChild.name == 'li' ) )\r
264                                                         addElement( ( lastChild = new CKEDITOR.htmlParser.element( 'li' ) ), currentNode );\r
265 \r
266                                                 !element.returnPoint && ( element.returnPoint = currentNode );\r
267                                                 currentNode = lastChild;\r
268                                         }\r
269                                         // Establish new list root for orphan list items.\r
270                                         else if ( tagName in CKEDITOR.dtd.$listItem && currentName != tagName )\r
271                                                 parser.onTagOpen( tagName == 'li' ? 'ul' : 'dl', {}, 0, 1 );\r
272                                         // We're inside a structural block like table and list, AND the incoming element\r
273                                         // is not of the same type (e.g. <td>td1<td>td2</td>), we simply add this new one before it,\r
274                                         // and most importantly, return back to here once this element is added,\r
275                                         // e.g. <table><tr><td>td1</td><p>p1</p><td>td2</td></tr></table>\r
276                                         else if ( currentName in nonBreakingBlocks && currentName != tagName )\r
277                                         {\r
278                                                 !element.returnPoint && ( element.returnPoint = currentNode );\r
279                                                 currentNode = currentNode.parent;\r
280                                         }\r
281                                         else\r
282                                         {\r
283                                                 // The current element is an inline element, which\r
284                                                 // need to be continued even after the close, so put\r
285                                                 // it in the pending list.\r
286                                                 if ( currentName in CKEDITOR.dtd.$inline )\r
287                                                         pendingInline.unshift( currentNode );\r
288 \r
289                                                 // The most common case where we just need to close the\r
290                                                 // current one and append the new one to the parent.\r
291                                                 if ( currentNode.parent )\r
292                                                         addElement( currentNode, currentNode.parent, 1 );\r
293                                                 // We've tried our best to fix the embarrassment here, while\r
294                                                 // this element still doesn't find it's parent, mark it as\r
295                                                 // orphan and show our tolerance to it.\r
296                                                 else\r
297                                                 {\r
298                                                         element.isOrphan = 1;\r
299                                                         break;\r
300                                                 }\r
301                                         }\r
302                                 }\r
303                                 else\r
304                                         break;\r
305                         }\r
306 \r
307                         checkPending( tagName );\r
308                         sendPendingBRs();\r
309 \r
310                         element.parent = currentNode;\r
311 \r
312                         if ( element.isEmpty )\r
313                                 addElement( element );\r
314                         else\r
315                                 currentNode = element;\r
316                 };\r
317 \r
318                 parser.onTagClose = function( tagName )\r
319                 {\r
320                         // Check if there is any pending tag to be closed.\r
321                         for ( var i = pendingInline.length - 1 ; i >= 0 ; i-- )\r
322                         {\r
323                                 // If found, just remove it from the list.\r
324                                 if ( tagName == pendingInline[ i ].name )\r
325                                 {\r
326                                         pendingInline.splice( i, 1 );\r
327                                         return;\r
328                                 }\r
329                         }\r
330 \r
331                         var pendingAdd = [],\r
332                                 newPendingInline = [],\r
333                                 candidate = currentNode;\r
334 \r
335                         while ( candidate != fragment && candidate.name != tagName )\r
336                         {\r
337                                 // If this is an inline element, add it to the pending list, if we're\r
338                                 // really closing one of the parents element later, they will continue\r
339                                 // after it.\r
340                                 if ( !candidate._.isBlockLike )\r
341                                         newPendingInline.unshift( candidate );\r
342 \r
343                                 // This node should be added to it's parent at this point. But,\r
344                                 // it should happen only if the closing tag is really closing\r
345                                 // one of the nodes. So, for now, we just cache it.\r
346                                 pendingAdd.push( candidate );\r
347 \r
348                                 // Make sure return point is properly restored.\r
349                                 candidate = candidate.returnPoint || candidate.parent;\r
350                         }\r
351 \r
352                         if ( candidate != fragment )\r
353                         {\r
354                                 // Add all elements that have been found in the above loop.\r
355                                 for ( i = 0 ; i < pendingAdd.length ; i++ )\r
356                                 {\r
357                                         var node = pendingAdd[ i ];\r
358                                         addElement( node, node.parent );\r
359                                 }\r
360 \r
361                                 currentNode = candidate;\r
362 \r
363                                 if ( currentNode.name == 'pre' )\r
364                                         inPre = false;\r
365 \r
366                                 if ( currentNode.name == 'textarea' )\r
367                                         inTextarea = false;\r
368 \r
369                                 if ( candidate._.isBlockLike )\r
370                                         sendPendingBRs();\r
371 \r
372                                 addElement( candidate, candidate.parent );\r
373 \r
374                                 // The parent should start receiving new nodes now, except if\r
375                                 // addElement changed the currentNode.\r
376                                 if ( candidate == currentNode )\r
377                                         currentNode = currentNode.parent;\r
378 \r
379                                 pendingInline = pendingInline.concat( newPendingInline );\r
380                         }\r
381 \r
382                         if ( tagName == 'body' )\r
383                                 fixForBody = false;\r
384                 };\r
385 \r
386                 parser.onText = function( text )\r
387                 {\r
388                         // Trim empty spaces at beginning of text contents except <pre> and <textarea>.\r
389                         if ( ( !currentNode._.hasInlineStarted || pendingBRs.length ) && !inPre && !inTextarea )\r
390                         {\r
391                                 text = CKEDITOR.tools.ltrim( text );\r
392 \r
393                                 if ( text.length === 0 )\r
394                                         return;\r
395                         }\r
396 \r
397                         sendPendingBRs();\r
398                         checkPending();\r
399 \r
400                         if ( fixForBody\r
401                                  && ( !currentNode.type || currentNode.name == 'body' )\r
402                                  && CKEDITOR.tools.trim( text ) )\r
403                         {\r
404                                 this.onTagOpen( fixForBody, {}, 0, 1 );\r
405                         }\r
406 \r
407                         // Shrinking consequential spaces into one single for all elements\r
408                         // text contents.\r
409                         if ( !inPre && !inTextarea )\r
410                                 text = text.replace( /[\t\r\n ]{2,}|[\t\r\n]/g, ' ' );\r
411 \r
412                         currentNode.add( new CKEDITOR.htmlParser.text( text ) );\r
413                 };\r
414 \r
415                 parser.onCDATA = function( cdata )\r
416                 {\r
417                         currentNode.add( new CKEDITOR.htmlParser.cdata( cdata ) );\r
418                 };\r
419 \r
420                 parser.onComment = function( comment )\r
421                 {\r
422                         sendPendingBRs();\r
423                         checkPending();\r
424                         currentNode.add( new CKEDITOR.htmlParser.comment( comment ) );\r
425                 };\r
426 \r
427                 // Parse it.\r
428                 parser.parse( fragmentHtml );\r
429 \r
430                 // Send all pending BRs except one, which we consider a unwanted bogus. (#5293)\r
431                 sendPendingBRs( !CKEDITOR.env.ie && 1 );\r
432 \r
433                 // Close all pending nodes, make sure return point is properly restored.\r
434                 while ( currentNode != fragment )\r
435                         addElement( currentNode, currentNode.parent, 1 );\r
436 \r
437                 return fragment;\r
438         };\r
439 \r
440         CKEDITOR.htmlParser.fragment.prototype =\r
441         {\r
442                 /**\r
443                  * Adds a node to this fragment.\r
444                  * @param {Object} node The node to be added. It can be any of of the\r
445                  *              following types: {@link CKEDITOR.htmlParser.element},\r
446                  *              {@link CKEDITOR.htmlParser.text} and\r
447                  *              {@link CKEDITOR.htmlParser.comment}.\r
448                  *      @param {Number} [index] From where the insertion happens.\r
449                  * @example\r
450                  */\r
451                 add : function( node, index )\r
452                 {\r
453                         isNaN( index ) && ( index = this.children.length );\r
454 \r
455                         var previous = index > 0 ? this.children[ index - 1 ] : null;\r
456                         if ( previous )\r
457                         {\r
458                                 // If the block to be appended is following text, trim spaces at\r
459                                 // the right of it.\r
460                                 if ( node._.isBlockLike && previous.type == CKEDITOR.NODE_TEXT )\r
461                                 {\r
462                                         previous.value = CKEDITOR.tools.rtrim( previous.value );\r
463 \r
464                                         // If we have completely cleared the previous node.\r
465                                         if ( previous.value.length === 0 )\r
466                                         {\r
467                                                 // Remove it from the list and add the node again.\r
468                                                 this.children.pop();\r
469                                                 this.add( node );\r
470                                                 return;\r
471                                         }\r
472                                 }\r
473 \r
474                                 previous.next = node;\r
475                         }\r
476 \r
477                         node.previous = previous;\r
478                         node.parent = this;\r
479 \r
480                         this.children.splice( index, 0, node );\r
481 \r
482                         this._.hasInlineStarted = node.type == CKEDITOR.NODE_TEXT || ( node.type == CKEDITOR.NODE_ELEMENT && !node._.isBlockLike );\r
483                 },\r
484 \r
485                 /**\r
486                  * Writes the fragment HTML to a CKEDITOR.htmlWriter.\r
487                  * @param {CKEDITOR.htmlWriter} writer The writer to which write the HTML.\r
488                  * @example\r
489                  * var writer = new CKEDITOR.htmlWriter();\r
490                  * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '&lt;P&gt;&lt;B&gt;Example' );\r
491                  * fragment.writeHtml( writer )\r
492                  * alert( writer.getHtml() );  "&lt;p&gt;&lt;b&gt;Example&lt;/b&gt;&lt;/p&gt;"\r
493                  */\r
494                 writeHtml : function( writer, filter )\r
495                 {\r
496                         var isChildrenFiltered;\r
497                         this.filterChildren = function()\r
498                         {\r
499                                 var writer = new CKEDITOR.htmlParser.basicWriter();\r
500                                 this.writeChildrenHtml.call( this, writer, filter, true );\r
501                                 var html = writer.getHtml();\r
502                                 this.children = new CKEDITOR.htmlParser.fragment.fromHtml( html ).children;\r
503                                 isChildrenFiltered = 1;\r
504                         };\r
505 \r
506                         // Filtering the root fragment before anything else.\r
507                         !this.name && filter && filter.onFragment( this );\r
508 \r
509                         this.writeChildrenHtml( writer, isChildrenFiltered ? null : filter );\r
510                 },\r
511 \r
512                 writeChildrenHtml : function( writer, filter )\r
513                 {\r
514                         for ( var i = 0 ; i < this.children.length ; i++ )\r
515                                 this.children[i].writeHtml( writer, filter );\r
516                 }\r
517         };\r
518 })();\r