JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
f94f4e917d242c61b6e99e942495f852abae6868
[ckeditor.git] / _source / core / htmlparser / fragment.js
1 /*\r
2 Copyright (c) 2003-2011, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 /**\r
7  * A lightweight representation of an HTML DOM structure.\r
8  * @constructor\r
9  * @example\r
10  */\r
11 CKEDITOR.htmlParser.fragment = function()\r
12 {\r
13         /**\r
14          * The nodes contained in the root of this fragment.\r
15          * @type Array\r
16          * @example\r
17          * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );\r
18          * alert( fragment.children.length );  "2"\r
19          */\r
20         this.children = [];\r
21 \r
22         /**\r
23          * Get the fragment parent. Should always be null.\r
24          * @type Object\r
25          * @default null\r
26          * @example\r
27          */\r
28         this.parent = null;\r
29 \r
30         /** @private */\r
31         this._ =\r
32         {\r
33                 isBlockLike : true,\r
34                 hasInlineStarted : false\r
35         };\r
36 };\r
37 \r
38 (function()\r
39 {\r
40         // Elements which the end tag is marked as optional in the HTML 4.01 DTD\r
41         // (expect empty elements).\r
42         var optionalClose = {colgroup:1,dd:1,dt:1,li:1,option:1,p:1,td:1,tfoot:1,th:1,thead:1,tr:1};\r
43 \r
44         // Block-level elements whose internal structure should be respected during\r
45         // parser fixing.\r
46         var nonBreakingBlocks = CKEDITOR.tools.extend(\r
47                         {table:1,ul:1,ol:1,dl:1},\r
48                         CKEDITOR.dtd.table, CKEDITOR.dtd.ul, CKEDITOR.dtd.ol, CKEDITOR.dtd.dl ),\r
49                 listBlocks = CKEDITOR.dtd.$list, listItems = CKEDITOR.dtd.$listItem;\r
50 \r
51         /**\r
52          * Creates a {@link CKEDITOR.htmlParser.fragment} from an HTML string.\r
53          * @param {String} fragmentHtml The HTML to be parsed, filling the fragment.\r
54          * @param {Number} [fixForBody=false] Wrap body with specified element if needed.\r
55          * @returns CKEDITOR.htmlParser.fragment The fragment created.\r
56          * @example\r
57          * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );\r
58          * alert( fragment.children[0].name );  "b"\r
59          * alert( fragment.children[1].value );  " Text"\r
60          */\r
61         CKEDITOR.htmlParser.fragment.fromHtml = function( fragmentHtml, fixForBody )\r
62         {\r
63                 var parser = new CKEDITOR.htmlParser(),\r
64                         html = [],\r
65                         fragment = new CKEDITOR.htmlParser.fragment(),\r
66                         pendingInline = [],\r
67                         pendingBRs = [],\r
68                         currentNode = fragment,\r
69                     // Indicate we're inside a <pre> element, spaces should be touched differently.\r
70                         inPre = false,\r
71                         returnPoint;\r
72 \r
73                 function checkPending( newTagName )\r
74                 {\r
75                         var pendingBRsSent;\r
76 \r
77                         if ( pendingInline.length > 0 )\r
78                         {\r
79                                 for ( var i = 0 ; i < pendingInline.length ; i++ )\r
80                                 {\r
81                                         var pendingElement = pendingInline[ i ],\r
82                                                 pendingName = pendingElement.name,\r
83                                                 pendingDtd = CKEDITOR.dtd[ pendingName ],\r
84                                                 currentDtd = currentNode.name && CKEDITOR.dtd[ currentNode.name ];\r
85 \r
86                                         if ( ( !currentDtd || currentDtd[ pendingName ] ) && ( !newTagName || !pendingDtd || pendingDtd[ newTagName ] || !CKEDITOR.dtd[ newTagName ] ) )\r
87                                         {\r
88                                                 if ( !pendingBRsSent )\r
89                                                 {\r
90                                                         sendPendingBRs();\r
91                                                         pendingBRsSent = 1;\r
92                                                 }\r
93 \r
94                                                 // Get a clone for the pending element.\r
95                                                 pendingElement = pendingElement.clone();\r
96 \r
97                                                 // Add it to the current node and make it the current,\r
98                                                 // so the new element will be added inside of it.\r
99                                                 pendingElement.parent = currentNode;\r
100                                                 currentNode = pendingElement;\r
101 \r
102                                                 // Remove the pending element (back the index by one\r
103                                                 // to properly process the next entry).\r
104                                                 pendingInline.splice( i, 1 );\r
105                                                 i--;\r
106                                         }\r
107                                 }\r
108                         }\r
109                 }\r
110 \r
111                 function sendPendingBRs( brsToIgnore )\r
112                 {\r
113                         while ( pendingBRs.length - ( brsToIgnore || 0 ) > 0 )\r
114                                 currentNode.add( pendingBRs.shift() );\r
115                 }\r
116 \r
117                 function addElement( element, target, enforceCurrent )\r
118                 {\r
119                         target = target || currentNode || fragment;\r
120 \r
121                         // If the target is the fragment and this inline element can't go inside\r
122                         // body (if fixForBody).\r
123                         if ( fixForBody && !target.type )\r
124                         {\r
125                                 var elementName, realElementName;\r
126                                 if ( element.attributes\r
127                                          && ( realElementName =\r
128                                                   element.attributes[ 'data-cke-real-element-type' ] ) )\r
129                                         elementName = realElementName;\r
130                                 else\r
131                                         elementName =  element.name;\r
132 \r
133                                 if ( elementName && elementName in CKEDITOR.dtd.$inline )\r
134                                 {\r
135                                         var savedCurrent = currentNode;\r
136 \r
137                                         // Create a <p> in the fragment.\r
138                                         currentNode = target;\r
139                                         parser.onTagOpen( fixForBody, {} );\r
140 \r
141                                         // The new target now is the <p>.\r
142                                         target = currentNode;\r
143 \r
144                                         if ( enforceCurrent )\r
145                                                 currentNode = savedCurrent;\r
146                                 }\r
147                         }\r
148 \r
149                         // Rtrim empty spaces on block end boundary. (#3585)\r
150                         if ( element._.isBlockLike\r
151                                  && element.name != 'pre' )\r
152                         {\r
153 \r
154                                 var length = element.children.length,\r
155                                         lastChild = element.children[ length - 1 ],\r
156                                         text;\r
157                                 if ( lastChild && lastChild.type == CKEDITOR.NODE_TEXT )\r
158                                 {\r
159                                         if ( !( text = CKEDITOR.tools.rtrim( lastChild.value ) ) )\r
160                                                 element.children.length = length -1;\r
161                                         else\r
162                                                 lastChild.value = text;\r
163                                 }\r
164                         }\r
165 \r
166                         target.add( element );\r
167 \r
168                         if ( element.returnPoint )\r
169                         {\r
170                                 currentNode = element.returnPoint;\r
171                                 delete element.returnPoint;\r
172                         }\r
173                 }\r
174 \r
175                 parser.onTagOpen = function( tagName, attributes, selfClosing )\r
176                 {\r
177                         var element = new CKEDITOR.htmlParser.element( tagName, attributes );\r
178 \r
179                         // "isEmpty" will be always "false" for unknown elements, so we\r
180                         // must force it if the parser has identified it as a selfClosing tag.\r
181                         if ( element.isUnknown && selfClosing )\r
182                                 element.isEmpty = true;\r
183 \r
184                         // This is a tag to be removed if empty, so do not add it immediately.\r
185                         if ( CKEDITOR.dtd.$removeEmpty[ tagName ] )\r
186                         {\r
187                                 pendingInline.push( element );\r
188                                 return;\r
189                         }\r
190                         else if ( tagName == 'pre' )\r
191                                 inPre = true;\r
192                         else if ( tagName == 'br' && inPre )\r
193                         {\r
194                                 currentNode.add( new CKEDITOR.htmlParser.text( '\n' ) );\r
195                                 return;\r
196                         }\r
197 \r
198                         if ( tagName == 'br' )\r
199                         {\r
200                                 pendingBRs.push( element );\r
201                                 return;\r
202                         }\r
203 \r
204                         var currentName = currentNode.name;\r
205 \r
206                         var currentDtd = currentName\r
207                                 && ( CKEDITOR.dtd[ currentName ]\r
208                                         || ( currentNode._.isBlockLike ? CKEDITOR.dtd.div : CKEDITOR.dtd.span ) );\r
209 \r
210                         // If the element cannot be child of the current element.\r
211                         if ( currentDtd   // Fragment could receive any elements.\r
212                                  && !element.isUnknown && !currentNode.isUnknown && !currentDtd[ tagName ] )\r
213                         {\r
214 \r
215                                 var reApply = false,\r
216                                         addPoint;   // New position to start adding nodes.\r
217 \r
218                                 // Fixing malformed nested lists by moving it into a previous list item. (#3828)\r
219                                 if ( tagName in listBlocks\r
220                                         && currentName in listBlocks )\r
221                                 {\r
222                                         var children = currentNode.children,\r
223                                                 lastChild = children[ children.length - 1 ];\r
224 \r
225                                         // Establish the list item if it's not existed.\r
226                                         if ( !( lastChild && lastChild.name in listItems ) )\r
227                                                 addElement( ( lastChild = new CKEDITOR.htmlParser.element( 'li' ) ), currentNode );\r
228 \r
229                                         returnPoint = currentNode, addPoint = lastChild;\r
230                                 }\r
231                                 // If the element name is the same as the current element name,\r
232                                 // then just close the current one and append the new one to the\r
233                                 // parent. This situation usually happens with <p>, <li>, <dt> and\r
234                                 // <dd>, specially in IE. Do not enter in this if block in this case.\r
235                                 else if ( tagName == currentName )\r
236                                 {\r
237                                         addElement( currentNode, currentNode.parent );\r
238                                 }\r
239                                 else if ( tagName in CKEDITOR.dtd.$listItem )\r
240                                 {\r
241                                         parser.onTagOpen( 'ul', {} );\r
242                                         addPoint = currentNode;\r
243                                         reApply = true;\r
244                                 }\r
245                                 else\r
246                                 {\r
247                                         if ( nonBreakingBlocks[ currentName ] )\r
248                                         {\r
249                                                 if ( !returnPoint )\r
250                                                         returnPoint = currentNode;\r
251                                         }\r
252                                         else\r
253                                         {\r
254                                                 addElement( currentNode, currentNode.parent, true );\r
255 \r
256                                                 if ( !optionalClose[ currentName ] )\r
257                                                 {\r
258                                                         // The current element is an inline element, which\r
259                                                         // cannot hold the new one. Put it in the pending list,\r
260                                                         // and try adding the new one after it.\r
261                                                         pendingInline.unshift( currentNode );\r
262                                                 }\r
263                                         }\r
264 \r
265                                         reApply = true;\r
266                                 }\r
267 \r
268                                 if ( addPoint )\r
269                                         currentNode = addPoint;\r
270                                 // Try adding it to the return point, or the parent element.\r
271                                 else\r
272                                         currentNode = currentNode.returnPoint || currentNode.parent;\r
273 \r
274                                 if ( reApply )\r
275                                 {\r
276                                         parser.onTagOpen.apply( this, arguments );\r
277                                         return;\r
278                                 }\r
279                         }\r
280 \r
281                         checkPending( tagName );\r
282                         sendPendingBRs();\r
283 \r
284                         element.parent = currentNode;\r
285                         element.returnPoint = returnPoint;\r
286                         returnPoint = 0;\r
287 \r
288                         if ( element.isEmpty )\r
289                                 addElement( element );\r
290                         else\r
291                                 currentNode = element;\r
292                 };\r
293 \r
294                 parser.onTagClose = function( tagName )\r
295                 {\r
296                         // Check if there is any pending tag to be closed.\r
297                         for ( var i = pendingInline.length - 1 ; i >= 0 ; i-- )\r
298                         {\r
299                                 // If found, just remove it from the list.\r
300                                 if ( tagName == pendingInline[ i ].name )\r
301                                 {\r
302                                         pendingInline.splice( i, 1 );\r
303                                         return;\r
304                                 }\r
305                         }\r
306 \r
307                         var pendingAdd = [],\r
308                                 newPendingInline = [],\r
309                                 candidate = currentNode;\r
310 \r
311                         while ( candidate.type && candidate.name != tagName )\r
312                         {\r
313                                 // If this is an inline element, add it to the pending list, if we're\r
314                                 // really closing one of the parents element later, they will continue\r
315                                 // after it.\r
316                                 if ( !candidate._.isBlockLike )\r
317                                         newPendingInline.unshift( candidate );\r
318 \r
319                                 // This node should be added to it's parent at this point. But,\r
320                                 // it should happen only if the closing tag is really closing\r
321                                 // one of the nodes. So, for now, we just cache it.\r
322                                 pendingAdd.push( candidate );\r
323 \r
324                                 candidate = candidate.parent;\r
325                         }\r
326 \r
327                         if ( candidate.type )\r
328                         {\r
329                                 // Add all elements that have been found in the above loop.\r
330                                 for ( i = 0 ; i < pendingAdd.length ; i++ )\r
331                                 {\r
332                                         var node = pendingAdd[ i ];\r
333                                         addElement( node, node.parent );\r
334                                 }\r
335 \r
336                                 currentNode = candidate;\r
337 \r
338                                 if ( currentNode.name == 'pre' )\r
339                                         inPre = false;\r
340 \r
341                                 if ( candidate._.isBlockLike )\r
342                                         sendPendingBRs();\r
343 \r
344                                 addElement( candidate, candidate.parent );\r
345 \r
346                                 // The parent should start receiving new nodes now, except if\r
347                                 // addElement changed the currentNode.\r
348                                 if ( candidate == currentNode )\r
349                                         currentNode = currentNode.parent;\r
350 \r
351                                 pendingInline = pendingInline.concat( newPendingInline );\r
352                         }\r
353 \r
354                         if ( tagName == 'body' )\r
355                                 fixForBody = false;\r
356                 };\r
357 \r
358                 parser.onText = function( text )\r
359                 {\r
360                         // Trim empty spaces at beginning of element contents except <pre>.\r
361                         if ( !currentNode._.hasInlineStarted && !inPre )\r
362                         {\r
363                                 text = CKEDITOR.tools.ltrim( text );\r
364 \r
365                                 if ( text.length === 0 )\r
366                                         return;\r
367                         }\r
368 \r
369                         sendPendingBRs();\r
370                         checkPending();\r
371 \r
372                         if ( fixForBody\r
373                                  && ( !currentNode.type || currentNode.name == 'body' )\r
374                                  && CKEDITOR.tools.trim( text ) )\r
375                         {\r
376                                 this.onTagOpen( fixForBody, {} );\r
377                         }\r
378 \r
379                         // Shrinking consequential spaces into one single for all elements\r
380                         // text contents.\r
381                         if ( !inPre )\r
382                                 text = text.replace( /[\t\r\n ]{2,}|[\t\r\n]/g, ' ' );\r
383 \r
384                         currentNode.add( new CKEDITOR.htmlParser.text( text ) );\r
385                 };\r
386 \r
387                 parser.onCDATA = function( cdata )\r
388                 {\r
389                         currentNode.add( new CKEDITOR.htmlParser.cdata( cdata ) );\r
390                 };\r
391 \r
392                 parser.onComment = function( comment )\r
393                 {\r
394                         sendPendingBRs();\r
395                         checkPending();\r
396                         currentNode.add( new CKEDITOR.htmlParser.comment( comment ) );\r
397                 };\r
398 \r
399                 // Parse it.\r
400                 parser.parse( fragmentHtml );\r
401 \r
402                 // Send all pending BRs except one, which we consider a unwanted bogus. (#5293)\r
403                 sendPendingBRs( !CKEDITOR.env.ie && 1 );\r
404 \r
405                 // Close all pending nodes.\r
406                 while ( currentNode.type )\r
407                 {\r
408                         var parent = currentNode.parent,\r
409                                 node = currentNode;\r
410 \r
411                         if ( fixForBody\r
412                                  && ( !parent.type || parent.name == 'body' )\r
413                                  && !CKEDITOR.dtd.$body[ node.name ] )\r
414                         {\r
415                                 currentNode = parent;\r
416                                 parser.onTagOpen( fixForBody, {} );\r
417                                 parent = currentNode;\r
418                         }\r
419 \r
420                         parent.add( node );\r
421                         currentNode = parent;\r
422                 }\r
423 \r
424                 return fragment;\r
425         };\r
426 \r
427         CKEDITOR.htmlParser.fragment.prototype =\r
428         {\r
429                 /**\r
430                  * Adds a node to this fragment.\r
431                  * @param {Object} node The node to be added. It can be any of of the\r
432                  *              following types: {@link CKEDITOR.htmlParser.element},\r
433                  *              {@link CKEDITOR.htmlParser.text} and\r
434                  *              {@link CKEDITOR.htmlParser.comment}.\r
435                  * @example\r
436                  */\r
437                 add : function( node )\r
438                 {\r
439                         var len = this.children.length,\r
440                                 previous = len > 0 && this.children[ len - 1 ] || null;\r
441 \r
442                         if ( previous )\r
443                         {\r
444                                 // If the block to be appended is following text, trim spaces at\r
445                                 // the right of it.\r
446                                 if ( node._.isBlockLike && previous.type == CKEDITOR.NODE_TEXT )\r
447                                 {\r
448                                         previous.value = CKEDITOR.tools.rtrim( previous.value );\r
449 \r
450                                         // If we have completely cleared the previous node.\r
451                                         if ( previous.value.length === 0 )\r
452                                         {\r
453                                                 // Remove it from the list and add the node again.\r
454                                                 this.children.pop();\r
455                                                 this.add( node );\r
456                                                 return;\r
457                                         }\r
458                                 }\r
459 \r
460                                 previous.next = node;\r
461                         }\r
462 \r
463                         node.previous = previous;\r
464                         node.parent = this;\r
465 \r
466                         this.children.push( node );\r
467 \r
468                         this._.hasInlineStarted = node.type == CKEDITOR.NODE_TEXT || ( node.type == CKEDITOR.NODE_ELEMENT && !node._.isBlockLike );\r
469                 },\r
470 \r
471                 /**\r
472                  * Writes the fragment HTML to a CKEDITOR.htmlWriter.\r
473                  * @param {CKEDITOR.htmlWriter} writer The writer to which write the HTML.\r
474                  * @example\r
475                  * var writer = new CKEDITOR.htmlWriter();\r
476                  * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '&lt;P&gt;&lt;B&gt;Example' );\r
477                  * fragment.writeHtml( writer )\r
478                  * alert( writer.getHtml() );  "&lt;p&gt;&lt;b&gt;Example&lt;/b&gt;&lt;/p&gt;"\r
479                  */\r
480                 writeHtml : function( writer, filter )\r
481                 {\r
482                         var isChildrenFiltered;\r
483                         this.filterChildren = function()\r
484                         {\r
485                                 var writer = new CKEDITOR.htmlParser.basicWriter();\r
486                                 this.writeChildrenHtml.call( this, writer, filter, true );\r
487                                 var html = writer.getHtml();\r
488                                 this.children = new CKEDITOR.htmlParser.fragment.fromHtml( html ).children;\r
489                                 isChildrenFiltered = 1;\r
490                         };\r
491 \r
492                         // Filtering the root fragment before anything else.\r
493                         !this.name && filter && filter.onFragment( this );\r
494 \r
495                         this.writeChildrenHtml( writer, isChildrenFiltered ? null : filter );\r
496                 },\r
497 \r
498                 writeChildrenHtml : function( writer, filter )\r
499                 {\r
500                         for ( var i = 0 ; i < this.children.length ; i++ )\r
501                                 this.children[i].writeHtml( writer, filter );\r
502                 }\r
503         };\r
504 })();\r