JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
vanilla ckeditor-3.6.3
[ckeditor.git] / _source / core / htmlparser / fragment.js
1 /*\r
2 Copyright (c) 2003-2012, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 /**\r
7  * A lightweight representation of an HTML DOM structure.\r
8  * @constructor\r
9  * @example\r
10  */\r
11 CKEDITOR.htmlParser.fragment = function()\r
12 {\r
13         /**\r
14          * The nodes contained in the root of this fragment.\r
15          * @type Array\r
16          * @example\r
17          * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );\r
18          * alert( fragment.children.length );  "2"\r
19          */\r
20         this.children = [];\r
21 \r
22         /**\r
23          * Get the fragment parent. Should always be null.\r
24          * @type Object\r
25          * @default null\r
26          * @example\r
27          */\r
28         this.parent = null;\r
29 \r
30         /** @private */\r
31         this._ =\r
32         {\r
33                 isBlockLike : true,\r
34                 hasInlineStarted : false\r
35         };\r
36 };\r
37 \r
38 (function()\r
39 {\r
40         // Block-level elements whose internal structure should be respected during\r
41         // parser fixing.\r
42         var nonBreakingBlocks = CKEDITOR.tools.extend( { table:1,ul:1,ol:1,dl:1 }, CKEDITOR.dtd.table, CKEDITOR.dtd.ul, CKEDITOR.dtd.ol, CKEDITOR.dtd.dl );\r
43 \r
44         // IE < 8 don't output the close tag on definition list items. (#6975)\r
45         var optionalCloseTags = CKEDITOR.env.ie && CKEDITOR.env.version < 8 ? { dd : 1, dt :1 } : {};\r
46 \r
47         var listBlocks = { ol:1, ul:1 };\r
48 \r
49         // Dtd of the fragment element, basically it accept anything except for intermediate structure, e.g. orphan <li>.\r
50         var rootDtd = CKEDITOR.tools.extend( {}, { html: 1 }, CKEDITOR.dtd.html, CKEDITOR.dtd.body, CKEDITOR.dtd.head, { style:1,script:1 } );\r
51 \r
52         function isRemoveEmpty( node )\r
53         {\r
54                 // Empty link is to be removed when empty but not anchor. (#7894)\r
55                 return node.name == 'a' && node.attributes.href\r
56                         || CKEDITOR.dtd.$removeEmpty[ node.name ];\r
57         }\r
58 \r
59         /**\r
60          * Creates a {@link CKEDITOR.htmlParser.fragment} from an HTML string.\r
61          * @param {String} fragmentHtml The HTML to be parsed, filling the fragment.\r
62          * @param {Number} [fixForBody=false] Wrap body with specified element if needed.\r
63          * @param {CKEDITOR.htmlParser.element} contextNode Parse the html as the content of this element.\r
64          * @returns CKEDITOR.htmlParser.fragment The fragment created.\r
65          * @example\r
66          * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );\r
67          * alert( fragment.children[0].name );  "b"\r
68          * alert( fragment.children[1].value );  " Text"\r
69          */\r
70         CKEDITOR.htmlParser.fragment.fromHtml = function( fragmentHtml, fixForBody, contextNode )\r
71         {\r
72                 var parser = new CKEDITOR.htmlParser(),\r
73                         fragment = contextNode || new CKEDITOR.htmlParser.fragment(),\r
74                         pendingInline = [],\r
75                         pendingBRs = [],\r
76                         currentNode = fragment,\r
77                     // Indicate we're inside a <textarea> element, spaces should be touched differently.\r
78                         inTextarea = false,\r
79                     // Indicate we're inside a <pre> element, spaces should be touched differently.\r
80                         inPre = false;\r
81 \r
82                 function checkPending( newTagName )\r
83                 {\r
84                         var pendingBRsSent;\r
85 \r
86                         if ( pendingInline.length > 0 )\r
87                         {\r
88                                 for ( var i = 0 ; i < pendingInline.length ; i++ )\r
89                                 {\r
90                                         var pendingElement = pendingInline[ i ],\r
91                                                 pendingName = pendingElement.name,\r
92                                                 pendingDtd = CKEDITOR.dtd[ pendingName ],\r
93                                                 currentDtd = currentNode.name && CKEDITOR.dtd[ currentNode.name ];\r
94 \r
95                                         if ( ( !currentDtd || currentDtd[ pendingName ] ) && ( !newTagName || !pendingDtd || pendingDtd[ newTagName ] || !CKEDITOR.dtd[ newTagName ] ) )\r
96                                         {\r
97                                                 if ( !pendingBRsSent )\r
98                                                 {\r
99                                                         sendPendingBRs();\r
100                                                         pendingBRsSent = 1;\r
101                                                 }\r
102 \r
103                                                 // Get a clone for the pending element.\r
104                                                 pendingElement = pendingElement.clone();\r
105 \r
106                                                 // Add it to the current node and make it the current,\r
107                                                 // so the new element will be added inside of it.\r
108                                                 pendingElement.parent = currentNode;\r
109                                                 currentNode = pendingElement;\r
110 \r
111                                                 // Remove the pending element (back the index by one\r
112                                                 // to properly process the next entry).\r
113                                                 pendingInline.splice( i, 1 );\r
114                                                 i--;\r
115                                         }\r
116                                         else\r
117                                         {\r
118                                                 // Some element of the same type cannot be nested, flat them,\r
119                                                 // e.g. <a href="#">foo<a href="#">bar</a></a>. (#7894)\r
120                                                 if ( pendingName == currentNode.name )\r
121                                                         addElement( currentNode, currentNode.parent, 1 ), i--;\r
122                                         }\r
123                                 }\r
124                         }\r
125                 }\r
126 \r
127                 function sendPendingBRs()\r
128                 {\r
129                         while ( pendingBRs.length )\r
130                                 currentNode.add( pendingBRs.shift() );\r
131                 }\r
132 \r
133                 /*\r
134                 * Beside of simply append specified element to target, this function also takes\r
135                 * care of other dirty lifts like forcing block in body, trimming spaces at\r
136                 * the block boundaries etc.\r
137                 *\r
138                 * @param {Element} element  The element to be added as the last child of {@link target}.\r
139                 * @param {Element} target The parent element to relieve the new node.\r
140                 * @param {Boolean} [moveCurrent=false] Don't change the "currentNode" global unless\r
141                 * there's a return point node specified on the element, otherwise move current onto {@link target} node.\r
142                  */\r
143                 function addElement( element, target, moveCurrent )\r
144                 {\r
145                         // Ignore any element that has already been added.\r
146                         if ( element.previous !== undefined )\r
147                                 return;\r
148 \r
149                         target = target || currentNode || fragment;\r
150 \r
151                         // Current element might be mangled by fix body below,\r
152                         // save it for restore later.\r
153                         var savedCurrent = currentNode;\r
154 \r
155                         // If the target is the fragment and this inline element can't go inside\r
156                         // body (if fixForBody).\r
157                         if ( fixForBody && ( !target.type || target.name == 'body' ) )\r
158                         {\r
159                                 var elementName, realElementName;\r
160                                 if ( element.attributes\r
161                                          && ( realElementName =\r
162                                                   element.attributes[ 'data-cke-real-element-type' ] ) )\r
163                                         elementName = realElementName;\r
164                                 else\r
165                                         elementName =  element.name;\r
166 \r
167                                 if ( elementName && !( elementName in CKEDITOR.dtd.$body || elementName == 'body' || element.isOrphan ) )\r
168                                 {\r
169                                         // Create a <p> in the fragment.\r
170                                         currentNode = target;\r
171                                         parser.onTagOpen( fixForBody, {} );\r
172 \r
173                                         // The new target now is the <p>.\r
174                                         element.returnPoint = target = currentNode;\r
175                                 }\r
176                         }\r
177 \r
178                         // Rtrim empty spaces on block end boundary. (#3585)\r
179                         if ( element._.isBlockLike\r
180                                  && element.name != 'pre' && element.name != 'textarea' )\r
181                         {\r
182 \r
183                                 var length = element.children.length,\r
184                                         lastChild = element.children[ length - 1 ],\r
185                                         text;\r
186                                 if ( lastChild && lastChild.type == CKEDITOR.NODE_TEXT )\r
187                                 {\r
188                                         if ( !( text = CKEDITOR.tools.rtrim( lastChild.value ) ) )\r
189                                                 element.children.length = length -1;\r
190                                         else\r
191                                                 lastChild.value = text;\r
192                                 }\r
193                         }\r
194 \r
195                         target.add( element );\r
196 \r
197                         if ( element.name == 'pre' )\r
198                                 inPre = false;\r
199 \r
200                         if ( element.name == 'textarea' )\r
201                                 inTextarea = false;\r
202 \r
203 \r
204                         if ( element.returnPoint )\r
205                         {\r
206                                 currentNode = element.returnPoint;\r
207                                 delete element.returnPoint;\r
208                         }\r
209                         else\r
210                                 currentNode = moveCurrent ? target : savedCurrent;\r
211                 }\r
212 \r
213                 parser.onTagOpen = function( tagName, attributes, selfClosing, optionalClose )\r
214                 {\r
215                         var element = new CKEDITOR.htmlParser.element( tagName, attributes );\r
216 \r
217                         // "isEmpty" will be always "false" for unknown elements, so we\r
218                         // must force it if the parser has identified it as a selfClosing tag.\r
219                         if ( element.isUnknown && selfClosing )\r
220                                 element.isEmpty = true;\r
221 \r
222                         // Check for optional closed elements, including browser quirks and manually opened blocks.\r
223                         element.isOptionalClose = tagName in optionalCloseTags || optionalClose;\r
224 \r
225                         // This is a tag to be removed if empty, so do not add it immediately.\r
226                         if ( isRemoveEmpty( element ) )\r
227                         {\r
228                                 pendingInline.push( element );\r
229                                 return;\r
230                         }\r
231                         else if ( tagName == 'pre' )\r
232                                 inPre = true;\r
233                         else if ( tagName == 'br' && inPre )\r
234                         {\r
235                                 currentNode.add( new CKEDITOR.htmlParser.text( '\n' ) );\r
236                                 return;\r
237                         }\r
238                         else if ( tagName == 'textarea' )\r
239                                 inTextarea = true;\r
240 \r
241                         if ( tagName == 'br' )\r
242                         {\r
243                                 pendingBRs.push( element );\r
244                                 return;\r
245                         }\r
246 \r
247                         while( 1 )\r
248                         {\r
249                                 var currentName = currentNode.name;\r
250 \r
251                                 var currentDtd = currentName ? ( CKEDITOR.dtd[ currentName ]\r
252                                                 || ( currentNode._.isBlockLike ? CKEDITOR.dtd.div : CKEDITOR.dtd.span ) )\r
253                                                 : rootDtd;\r
254 \r
255                                 // If the element cannot be child of the current element.\r
256                                 if ( !element.isUnknown && !currentNode.isUnknown && !currentDtd[ tagName ] )\r
257                                 {\r
258                                         // Current node doesn't have a close tag, time for a close\r
259                                         // as this element isn't fit in. (#7497)\r
260                                         if ( currentNode.isOptionalClose )\r
261                                                 parser.onTagClose( currentName );\r
262                                         // Fixing malformed nested lists by moving it into a previous list item. (#3828)\r
263                                         else if ( tagName in listBlocks\r
264                                                 && currentName in listBlocks )\r
265                                         {\r
266                                                 var children = currentNode.children,\r
267                                                         lastChild = children[ children.length - 1 ];\r
268 \r
269                                                 // Establish the list item if it's not existed.\r
270                                                 if ( !( lastChild && lastChild.name == 'li' ) )\r
271                                                         addElement( ( lastChild = new CKEDITOR.htmlParser.element( 'li' ) ), currentNode );\r
272 \r
273                                                 !element.returnPoint && ( element.returnPoint = currentNode );\r
274                                                 currentNode = lastChild;\r
275                                         }\r
276                                         // Establish new list root for orphan list items.\r
277                                         else if ( tagName in CKEDITOR.dtd.$listItem && currentName != tagName )\r
278                                                 parser.onTagOpen( tagName == 'li' ? 'ul' : 'dl', {}, 0, 1 );\r
279                                         // We're inside a structural block like table and list, AND the incoming element\r
280                                         // is not of the same type (e.g. <td>td1<td>td2</td>), we simply add this new one before it,\r
281                                         // and most importantly, return back to here once this element is added,\r
282                                         // e.g. <table><tr><td>td1</td><p>p1</p><td>td2</td></tr></table>\r
283                                         else if ( currentName in nonBreakingBlocks && currentName != tagName )\r
284                                         {\r
285                                                 !element.returnPoint && ( element.returnPoint = currentNode );\r
286                                                 currentNode = currentNode.parent;\r
287                                         }\r
288                                         else\r
289                                         {\r
290                                                 // The current element is an inline element, which\r
291                                                 // need to be continued even after the close, so put\r
292                                                 // it in the pending list.\r
293                                                 if ( currentName in CKEDITOR.dtd.$inline )\r
294                                                         pendingInline.unshift( currentNode );\r
295 \r
296                                                 // The most common case where we just need to close the\r
297                                                 // current one and append the new one to the parent.\r
298                                                 if ( currentNode.parent )\r
299                                                         addElement( currentNode, currentNode.parent, 1 );\r
300                                                 // We've tried our best to fix the embarrassment here, while\r
301                                                 // this element still doesn't find it's parent, mark it as\r
302                                                 // orphan and show our tolerance to it.\r
303                                                 else\r
304                                                 {\r
305                                                         element.isOrphan = 1;\r
306                                                         break;\r
307                                                 }\r
308                                         }\r
309                                 }\r
310                                 else\r
311                                         break;\r
312                         }\r
313 \r
314                         checkPending( tagName );\r
315                         sendPendingBRs();\r
316 \r
317                         element.parent = currentNode;\r
318 \r
319                         if ( element.isEmpty )\r
320                                 addElement( element );\r
321                         else\r
322                                 currentNode = element;\r
323                 };\r
324 \r
325                 parser.onTagClose = function( tagName )\r
326                 {\r
327                         // Check if there is any pending tag to be closed.\r
328                         for ( var i = pendingInline.length - 1 ; i >= 0 ; i-- )\r
329                         {\r
330                                 // If found, just remove it from the list.\r
331                                 if ( tagName == pendingInline[ i ].name )\r
332                                 {\r
333                                         pendingInline.splice( i, 1 );\r
334                                         return;\r
335                                 }\r
336                         }\r
337 \r
338                         var pendingAdd = [],\r
339                                 newPendingInline = [],\r
340                                 candidate = currentNode;\r
341 \r
342                         while ( candidate != fragment && candidate.name != tagName )\r
343                         {\r
344                                 // If this is an inline element, add it to the pending list, if we're\r
345                                 // really closing one of the parents element later, they will continue\r
346                                 // after it.\r
347                                 if ( !candidate._.isBlockLike )\r
348                                         newPendingInline.unshift( candidate );\r
349 \r
350                                 // This node should be added to it's parent at this point. But,\r
351                                 // it should happen only if the closing tag is really closing\r
352                                 // one of the nodes. So, for now, we just cache it.\r
353                                 pendingAdd.push( candidate );\r
354 \r
355                                 // Make sure return point is properly restored.\r
356                                 candidate = candidate.returnPoint || candidate.parent;\r
357                         }\r
358 \r
359                         if ( candidate != fragment )\r
360                         {\r
361                                 // Add all elements that have been found in the above loop.\r
362                                 for ( i = 0 ; i < pendingAdd.length ; i++ )\r
363                                 {\r
364                                         var node = pendingAdd[ i ];\r
365                                         addElement( node, node.parent );\r
366                                 }\r
367 \r
368                                 currentNode = candidate;\r
369 \r
370                                 if ( candidate._.isBlockLike )\r
371                                         sendPendingBRs();\r
372 \r
373                                 addElement( candidate, candidate.parent );\r
374 \r
375                                 // The parent should start receiving new nodes now, except if\r
376                                 // addElement changed the currentNode.\r
377                                 if ( candidate == currentNode )\r
378                                         currentNode = currentNode.parent;\r
379 \r
380                                 pendingInline = pendingInline.concat( newPendingInline );\r
381                         }\r
382 \r
383                         if ( tagName == 'body' )\r
384                                 fixForBody = false;\r
385                 };\r
386 \r
387                 parser.onText = function( text )\r
388                 {\r
389                         // Trim empty spaces at beginning of text contents except <pre> and <textarea>.\r
390                         if ( ( !currentNode._.hasInlineStarted || pendingBRs.length ) && !inPre && !inTextarea )\r
391                         {\r
392                                 text = CKEDITOR.tools.ltrim( text );\r
393 \r
394                                 if ( text.length === 0 )\r
395                                         return;\r
396                         }\r
397 \r
398                         var currentName = currentNode.name,\r
399                         currentDtd = currentName ? ( CKEDITOR.dtd[ currentName ]\r
400                                                         || ( currentNode._.isBlockLike ?\r
401                                                                  CKEDITOR.dtd.div : CKEDITOR.dtd.span ) ) : rootDtd;\r
402 \r
403                         // Fix orphan text in list/table. (#8540) (#8870)\r
404                         if ( !inTextarea &&\r
405                                  !currentDtd [ '#' ] &&\r
406                                  currentName in nonBreakingBlocks )\r
407                         {\r
408                                 parser.onTagOpen( currentName in listBlocks ? 'li' :\r
409                                                                   currentName == 'dl' ? 'dd' :\r
410                                                                   currentName == 'table' ? 'tr' :\r
411                                                                   currentName == 'tr' ? 'td' : '' );\r
412                                 parser.onText( text );\r
413                                 return;\r
414                         }\r
415 \r
416                         sendPendingBRs();\r
417                         checkPending();\r
418 \r
419                         if ( fixForBody\r
420                                  && ( !currentNode.type || currentNode.name == 'body' )\r
421                                  && CKEDITOR.tools.trim( text ) )\r
422                         {\r
423                                 this.onTagOpen( fixForBody, {}, 0, 1 );\r
424                         }\r
425 \r
426                         // Shrinking consequential spaces into one single for all elements\r
427                         // text contents.\r
428                         if ( !inPre && !inTextarea )\r
429                                 text = text.replace( /[\t\r\n ]{2,}|[\t\r\n]/g, ' ' );\r
430 \r
431                         currentNode.add( new CKEDITOR.htmlParser.text( text ) );\r
432                 };\r
433 \r
434                 parser.onCDATA = function( cdata )\r
435                 {\r
436                         currentNode.add( new CKEDITOR.htmlParser.cdata( cdata ) );\r
437                 };\r
438 \r
439                 parser.onComment = function( comment )\r
440                 {\r
441                         sendPendingBRs();\r
442                         checkPending();\r
443                         currentNode.add( new CKEDITOR.htmlParser.comment( comment ) );\r
444                 };\r
445 \r
446                 // Parse it.\r
447                 parser.parse( fragmentHtml );\r
448 \r
449                 // Send all pending BRs except one, which we consider a unwanted bogus. (#5293)\r
450                 sendPendingBRs( !CKEDITOR.env.ie && 1 );\r
451 \r
452                 // Close all pending nodes, make sure return point is properly restored.\r
453                 while ( currentNode != fragment )\r
454                         addElement( currentNode, currentNode.parent, 1 );\r
455 \r
456                 return fragment;\r
457         };\r
458 \r
459         CKEDITOR.htmlParser.fragment.prototype =\r
460         {\r
461                 /**\r
462                  * Adds a node to this fragment.\r
463                  * @param {Object} node The node to be added. It can be any of of the\r
464                  *              following types: {@link CKEDITOR.htmlParser.element},\r
465                  *              {@link CKEDITOR.htmlParser.text} and\r
466                  *              {@link CKEDITOR.htmlParser.comment}.\r
467                  *      @param {Number} [index] From where the insertion happens.\r
468                  * @example\r
469                  */\r
470                 add : function( node, index )\r
471                 {\r
472                         isNaN( index ) && ( index = this.children.length );\r
473 \r
474                         var previous = index > 0 ? this.children[ index - 1 ] : null;\r
475                         if ( previous )\r
476                         {\r
477                                 // If the block to be appended is following text, trim spaces at\r
478                                 // the right of it.\r
479                                 if ( node._.isBlockLike && previous.type == CKEDITOR.NODE_TEXT )\r
480                                 {\r
481                                         previous.value = CKEDITOR.tools.rtrim( previous.value );\r
482 \r
483                                         // If we have completely cleared the previous node.\r
484                                         if ( previous.value.length === 0 )\r
485                                         {\r
486                                                 // Remove it from the list and add the node again.\r
487                                                 this.children.pop();\r
488                                                 this.add( node );\r
489                                                 return;\r
490                                         }\r
491                                 }\r
492 \r
493                                 previous.next = node;\r
494                         }\r
495 \r
496                         node.previous = previous;\r
497                         node.parent = this;\r
498 \r
499                         this.children.splice( index, 0, node );\r
500 \r
501                         this._.hasInlineStarted = node.type == CKEDITOR.NODE_TEXT || ( node.type == CKEDITOR.NODE_ELEMENT && !node._.isBlockLike );\r
502                 },\r
503 \r
504                 /**\r
505                  * Writes the fragment HTML to a CKEDITOR.htmlWriter.\r
506                  * @param {CKEDITOR.htmlWriter} writer The writer to which write the HTML.\r
507                  * @example\r
508                  * var writer = new CKEDITOR.htmlWriter();\r
509                  * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '&lt;P&gt;&lt;B&gt;Example' );\r
510                  * fragment.writeHtml( writer )\r
511                  * alert( writer.getHtml() );  "&lt;p&gt;&lt;b&gt;Example&lt;/b&gt;&lt;/p&gt;"\r
512                  */\r
513                 writeHtml : function( writer, filter )\r
514                 {\r
515                         var isChildrenFiltered;\r
516                         this.filterChildren = function()\r
517                         {\r
518                                 var writer = new CKEDITOR.htmlParser.basicWriter();\r
519                                 this.writeChildrenHtml.call( this, writer, filter, true );\r
520                                 var html = writer.getHtml();\r
521                                 this.children = new CKEDITOR.htmlParser.fragment.fromHtml( html ).children;\r
522                                 isChildrenFiltered = 1;\r
523                         };\r
524 \r
525                         // Filtering the root fragment before anything else.\r
526                         !this.name && filter && filter.onFragment( this );\r
527 \r
528                         this.writeChildrenHtml( writer, isChildrenFiltered ? null : filter );\r
529                 },\r
530 \r
531                 writeChildrenHtml : function( writer, filter )\r
532                 {\r
533                         for ( var i = 0 ; i < this.children.length ; i++ )\r
534                                 this.children[i].writeHtml( writer, filter );\r
535                 }\r
536         };\r
537 })();\r