JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
7f6c6df8620ac53b09fd51d66096c3bff6e43f82
[ckeditor.git] / _source / core / htmlparser / fragment.js
1 /*\r
2 Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved.\r
3 For licensing, see LICENSE.html or http://ckeditor.com/license\r
4 */\r
5 \r
6 /**\r
7  * A lightweight representation of an HTML DOM structure.\r
8  * @constructor\r
9  * @example\r
10  */\r
11 CKEDITOR.htmlParser.fragment = function()\r
12 {\r
13         /**\r
14          * The nodes contained in the root of this fragment.\r
15          * @type Array\r
16          * @example\r
17          * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );\r
18          * alert( fragment.children.length );  "2"\r
19          */\r
20         this.children = [];\r
21 \r
22         /**\r
23          * Get the fragment parent. Should always be null.\r
24          * @type Object\r
25          * @default null\r
26          * @example\r
27          */\r
28         this.parent = null;\r
29 \r
30         /** @private */\r
31         this._ =\r
32         {\r
33                 isBlockLike : true,\r
34                 hasInlineStarted : false\r
35         };\r
36 };\r
37 \r
38 (function()\r
39 {\r
40         // Elements which the end tag is marked as optional in the HTML 4.01 DTD\r
41         // (expect empty elements).\r
42         var optionalClose = {colgroup:1,dd:1,dt:1,li:1,option:1,p:1,td:1,tfoot:1,th:1,thead:1,tr:1};\r
43 \r
44         // Block-level elements whose internal structure should be respected during\r
45         // parser fixing.\r
46         var nonBreakingBlocks = CKEDITOR.tools.extend(\r
47                         {table:1,ul:1,ol:1,dl:1},\r
48                         CKEDITOR.dtd.table, CKEDITOR.dtd.ul, CKEDITOR.dtd.ol, CKEDITOR.dtd.dl );\r
49 \r
50         /**\r
51          * Creates a {@link CKEDITOR.htmlParser.fragment} from an HTML string.\r
52          * @param {String} fragmentHtml The HTML to be parsed, filling the fragment.\r
53          * @param {Number} [fixForBody=false] Wrap body with specified element if needed.\r
54          * @returns CKEDITOR.htmlParser.fragment The fragment created.\r
55          * @example\r
56          * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );\r
57          * alert( fragment.children[0].name );  "b"\r
58          * alert( fragment.children[1].value );  " Text"\r
59          */\r
60         CKEDITOR.htmlParser.fragment.fromHtml = function( fragmentHtml, fixForBody )\r
61         {\r
62                 var parser = new CKEDITOR.htmlParser(),\r
63                         html = [],\r
64                         fragment = new CKEDITOR.htmlParser.fragment(),\r
65                         pendingInline = [],\r
66                         currentNode = fragment,\r
67                     // Indicate we're inside a <pre> element, spaces should be touched differently.\r
68                         inPre = false,\r
69                         returnPoint;\r
70 \r
71                 function checkPending( newTagName )\r
72                 {\r
73                         if ( pendingInline.length > 0 )\r
74                         {\r
75                                 for ( var i = 0 ; i < pendingInline.length ; i++ )\r
76                                 {\r
77                                         var pendingElement = pendingInline[ i ],\r
78                                                 pendingName = pendingElement.name,\r
79                                                 pendingDtd = CKEDITOR.dtd[ pendingName ],\r
80                                                 currentDtd = currentNode.name && CKEDITOR.dtd[ currentNode.name ];\r
81 \r
82                                         if ( ( !currentDtd || currentDtd[ pendingName ] ) && ( !newTagName || !pendingDtd || pendingDtd[ newTagName ] || !CKEDITOR.dtd[ newTagName ] ) )\r
83                                         {\r
84                                                 // Get a clone for the pending element.\r
85                                                 pendingElement = pendingElement.clone();\r
86 \r
87                                                 // Add it to the current node and make it the current,\r
88                                                 // so the new element will be added inside of it.\r
89                                                 pendingElement.parent = currentNode;\r
90                                                 currentNode = pendingElement;\r
91 \r
92                                                 // Remove the pending element (back the index by one\r
93                                                 // to properly process the next entry).\r
94                                                 pendingInline.splice( i, 1 );\r
95                                                 i--;\r
96                                         }\r
97                                 }\r
98                         }\r
99                 }\r
100 \r
101                 function addElement( element, target, enforceCurrent )\r
102                 {\r
103                         target = target || currentNode || fragment;\r
104 \r
105                         // If the target is the fragment and this element can't go inside\r
106                         // body (if fixForBody).\r
107                         if ( fixForBody && !target.type )\r
108                         {\r
109                                 var elementName, realElementName;\r
110                                 if ( element.attributes\r
111                                          && ( realElementName =\r
112                                                   element.attributes[ '_cke_real_element_type' ] ) )\r
113                                         elementName = realElementName;\r
114                                 else\r
115                                         elementName =  element.name;\r
116                                 if ( !( elementName in CKEDITOR.dtd.$body ) )\r
117                                 {\r
118                                         var savedCurrent = currentNode;\r
119 \r
120                                         // Create a <p> in the fragment.\r
121                                         currentNode = target;\r
122                                         parser.onTagOpen( fixForBody, {} );\r
123 \r
124                                         // The new target now is the <p>.\r
125                                         target = currentNode;\r
126 \r
127                                         if ( enforceCurrent )\r
128                                                 currentNode = savedCurrent;\r
129                                 }\r
130                         }\r
131 \r
132                         // Rtrim empty spaces on block end boundary. (#3585)\r
133                         if ( element._.isBlockLike\r
134                                  && element.name != 'pre' )\r
135                         {\r
136 \r
137                                 var length = element.children.length,\r
138                                         lastChild = element.children[ length - 1 ],\r
139                                         text;\r
140                                 if ( lastChild && lastChild.type == CKEDITOR.NODE_TEXT )\r
141                                 {\r
142                                         if ( !( text = CKEDITOR.tools.rtrim( lastChild.value ) ) )\r
143                                                 element.children.length = length -1;\r
144                                         else\r
145                                                 lastChild.value = text;\r
146                                 }\r
147                         }\r
148 \r
149                         target.add( element );\r
150 \r
151                         if ( element.returnPoint )\r
152                         {\r
153                                 currentNode = element.returnPoint;\r
154                                 delete element.returnPoint;\r
155                         }\r
156                 }\r
157 \r
158                 parser.onTagOpen = function( tagName, attributes, selfClosing )\r
159                 {\r
160                         var element = new CKEDITOR.htmlParser.element( tagName, attributes );\r
161 \r
162                         // "isEmpty" will be always "false" for unknown elements, so we\r
163                         // must force it if the parser has identified it as a selfClosing tag.\r
164                         if ( element.isUnknown && selfClosing )\r
165                                 element.isEmpty = true;\r
166 \r
167                         // This is a tag to be removed if empty, so do not add it immediately.\r
168                         if ( CKEDITOR.dtd.$removeEmpty[ tagName ] )\r
169                         {\r
170                                 pendingInline.push( element );\r
171                                 return;\r
172                         }\r
173                         else if ( tagName == 'pre' )\r
174                                 inPre = true;\r
175                         else if ( tagName == 'br' && inPre )\r
176                         {\r
177                                 currentNode.add( new CKEDITOR.htmlParser.text( '\n' ) );\r
178                                 return;\r
179                         }\r
180 \r
181                         var currentName = currentNode.name,\r
182                                 currentDtd = ( currentName && CKEDITOR.dtd[ currentName ] ) || ( currentNode._.isBlockLike ? CKEDITOR.dtd.div : CKEDITOR.dtd.span );\r
183 \r
184                         // If the element cannot be child of the current element.\r
185                         if ( !element.isUnknown && !currentNode.isUnknown && !currentDtd[ tagName ] )\r
186                         {\r
187                                 // If this is the fragment node, just ignore this tag and add\r
188                                 // its children.\r
189                                 if ( !currentName )\r
190                                         return;\r
191 \r
192                                 var reApply = false;\r
193 \r
194                                 // If the element name is the same as the current element name,\r
195                                 // then just close the current one and append the new one to the\r
196                                 // parent. This situation usually happens with <p>, <li>, <dt> and\r
197                                 // <dd>, specially in IE. Do not enter in this if block in this case.\r
198                                 if ( tagName == currentName )\r
199                                 {\r
200                                         addElement( currentNode, currentNode.parent );\r
201                                 }\r
202                                 else\r
203                                 {\r
204                                         if ( nonBreakingBlocks[ currentName ] )\r
205                                         {\r
206                                                 if ( !returnPoint )\r
207                                                         returnPoint = currentNode;\r
208                                         }\r
209                                         else\r
210                                         {\r
211                                                 addElement( currentNode, currentNode.parent, true );\r
212 \r
213                                                 if ( !optionalClose[ currentName ] )\r
214                                                 {\r
215                                                         // The current element is an inline element, which\r
216                                                         // cannot hold the new one. Put it in the pending list,\r
217                                                         // and try adding the new one after it.\r
218                                                         pendingInline.unshift( currentNode );\r
219                                                 }\r
220                                         }\r
221 \r
222                                         reApply = true;\r
223                                 }\r
224 \r
225                                 // In any of the above cases, we'll be adding, or trying to\r
226                                 // add it to the parent.\r
227                                 currentNode = currentNode.returnPoint || currentNode.parent;\r
228 \r
229                                 if ( reApply )\r
230                                 {\r
231                                         parser.onTagOpen.apply( this, arguments );\r
232                                         return;\r
233                                 }\r
234                         }\r
235 \r
236                         checkPending( tagName );\r
237 \r
238                         element.parent = currentNode;\r
239                         element.returnPoint = returnPoint;\r
240                         returnPoint = 0;\r
241 \r
242                         if ( element.isEmpty )\r
243                                 addElement( element );\r
244                         else\r
245                                 currentNode = element;\r
246                 };\r
247 \r
248                 parser.onTagClose = function( tagName )\r
249                 {\r
250                         var index = 0,\r
251                                 pendingAdd = [],\r
252                                 candidate = currentNode;\r
253 \r
254                         while ( candidate.type && candidate.name != tagName )\r
255                         {\r
256                                 // If this is an inline element, add it to the pending list, so\r
257                                 // it will continue after the closing tag.\r
258                                 if ( !candidate._.isBlockLike )\r
259                                 {\r
260                                         pendingInline.unshift( candidate );\r
261 \r
262                                         // Increase the index, so it will not get checked again in\r
263                                         // the pending list check that follows.\r
264                                         index++;\r
265                                 }\r
266 \r
267                                 // This node should be added to it's parent at this point. But,\r
268                                 // it should happen only if the closing tag is really closing\r
269                                 // one of the nodes. So, for now, we just cache it.\r
270                                 pendingAdd.push( candidate );\r
271 \r
272                                 candidate = candidate.parent;\r
273                         }\r
274 \r
275                         if ( candidate.type )\r
276                         {\r
277                                 // Add all elements that have been found in the above loop.\r
278                                 for ( var i = 0 ; i < pendingAdd.length ; i++ )\r
279                                 {\r
280                                         var node = pendingAdd[ i ];\r
281                                         addElement( node, node.parent );\r
282                                 }\r
283 \r
284                                 currentNode = candidate;\r
285 \r
286                                 if( currentNode.name == 'pre' )\r
287                                         inPre = false;\r
288 \r
289                                 addElement( candidate, candidate.parent );\r
290 \r
291                                 // The parent should start receiving new nodes now, except if\r
292                                 // addElement changed the currentNode.\r
293                                 if ( candidate == currentNode )\r
294                                         currentNode = currentNode.parent;\r
295                         }\r
296                         // The tag is not actually closing anything, thus we need invalidate\r
297                         // the pending elements.(#3862)\r
298                         else\r
299                         {\r
300                                 pendingInline.splice( 0, index );\r
301                                 index = 0;\r
302                         }\r
303 \r
304                         // Check if there is any pending tag to be closed.\r
305                         for ( ; index < pendingInline.length ; index++ )\r
306                         {\r
307                                 // If found, just remove it from the list.\r
308                                 if ( tagName == pendingInline[ index ].name )\r
309                                 {\r
310                                         pendingInline.splice( index, 1 );\r
311 \r
312                                         // Decrease the index so we continue from the next one.\r
313                                         index--;\r
314                                 }\r
315                         }\r
316                 };\r
317 \r
318                 parser.onText = function( text )\r
319                 {\r
320                         // Trim empty spaces at beginning of element contents except <pre>.\r
321                         if ( !currentNode._.hasInlineStarted && !inPre )\r
322                         {\r
323                                 text = CKEDITOR.tools.ltrim( text );\r
324 \r
325                                 if ( text.length === 0 )\r
326                                         return;\r
327                         }\r
328 \r
329                         checkPending();\r
330 \r
331                         if ( fixForBody && !currentNode.type )\r
332                                 this.onTagOpen( fixForBody, {} );\r
333 \r
334                         // Shrinking consequential spaces into one single for all elements\r
335                         // text contents.\r
336                         if ( !inPre )\r
337                                 text = text.replace( /[\t\r\n ]{2,}|[\t\r\n]/g, ' ' );\r
338 \r
339                         currentNode.add( new CKEDITOR.htmlParser.text( text ) );\r
340                 };\r
341 \r
342                 parser.onCDATA = function( cdata )\r
343                 {\r
344                         currentNode.add( new CKEDITOR.htmlParser.cdata( cdata ) );\r
345                 };\r
346 \r
347                 parser.onComment = function( comment )\r
348                 {\r
349                         currentNode.add( new CKEDITOR.htmlParser.comment( comment ) );\r
350                 };\r
351 \r
352                 // Parse it.\r
353                 parser.parse( fragmentHtml );\r
354 \r
355                 // Close all pending nodes.\r
356                 while ( currentNode.type )\r
357                 {\r
358                         var parent = currentNode.parent,\r
359                                 node = currentNode;\r
360 \r
361                         if ( fixForBody && !parent.type && !CKEDITOR.dtd.$body[ node.name ] )\r
362                         {\r
363                                 currentNode = parent;\r
364                                 parser.onTagOpen( fixForBody, {} );\r
365                                 parent = currentNode;\r
366                         }\r
367 \r
368                         parent.add( node );\r
369                         currentNode = parent;\r
370                 }\r
371 \r
372                 return fragment;\r
373         };\r
374 \r
375         CKEDITOR.htmlParser.fragment.prototype =\r
376         {\r
377                 /**\r
378                  * Adds a node to this fragment.\r
379                  * @param {Object} node The node to be added. It can be any of of the\r
380                  *              following types: {@link CKEDITOR.htmlParser.element},\r
381                  *              {@link CKEDITOR.htmlParser.text} and\r
382                  *              {@link CKEDITOR.htmlParser.comment}.\r
383                  * @example\r
384                  */\r
385                 add : function( node )\r
386                 {\r
387                         var len = this.children.length,\r
388                                 previous = len > 0 && this.children[ len - 1 ] || null;\r
389 \r
390                         if ( previous )\r
391                         {\r
392                                 // If the block to be appended is following text, trim spaces at\r
393                                 // the right of it.\r
394                                 if ( node._.isBlockLike && previous.type == CKEDITOR.NODE_TEXT )\r
395                                 {\r
396                                         previous.value = CKEDITOR.tools.rtrim( previous.value );\r
397 \r
398                                         // If we have completely cleared the previous node.\r
399                                         if ( previous.value.length === 0 )\r
400                                         {\r
401                                                 // Remove it from the list and add the node again.\r
402                                                 this.children.pop();\r
403                                                 this.add( node );\r
404                                                 return;\r
405                                         }\r
406                                 }\r
407 \r
408                                 previous.next = node;\r
409                         }\r
410 \r
411                         node.previous = previous;\r
412                         node.parent = this;\r
413 \r
414                         this.children.push( node );\r
415 \r
416                         this._.hasInlineStarted = node.type == CKEDITOR.NODE_TEXT || ( node.type == CKEDITOR.NODE_ELEMENT && !node._.isBlockLike );\r
417                 },\r
418 \r
419                 /**\r
420                  * Writes the fragment HTML to a CKEDITOR.htmlWriter.\r
421                  * @param {CKEDITOR.htmlWriter} writer The writer to which write the HTML.\r
422                  * @example\r
423                  * var writer = new CKEDITOR.htmlWriter();\r
424                  * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '&lt;P&gt;&lt;B&gt;Example' );\r
425                  * fragment.writeHtml( writer )\r
426                  * alert( writer.getHtml() );  "&lt;p&gt;&lt;b&gt;Example&lt;/b&gt;&lt;/p&gt;"\r
427                  */\r
428                 writeHtml : function( writer, filter )\r
429                 {\r
430                         for ( var i = 0, len = this.children.length ; i < len ; i++ )\r
431                                 this.children[i].writeHtml( writer, filter );\r
432                 }\r
433         };\r
434 })();\r