1/*
2  pprint.c -- pretty print parse tree
3
4  (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
5  See tidy.h for the copyright notice.
6
7  CVS Info :
8
9    $Author: iccir $
10    $Date: 2007/02/08 06:24:33 $
11    $Revision: 1.4 $
12
13*/
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18
19#include "pprint.h"
20#include "tidy-int.h"
21#include "parser.h"
22#include "entities.h"
23#include "tmbstr.h"
24#include "utf8.h"
25
26/*
27  Block-level and unknown elements are printed on
28  new lines and their contents indented 2 spaces
29
30  Inline elements are printed inline.
31
32  Inline content is wrapped on spaces (except in
33  attribute values or preformatted text, after
34  start tags and before end tags
35*/
36
37static void PPrintAsp( TidyDocImpl* doc, uint indent, Node* node );
38static void PPrintJste( TidyDocImpl* doc, uint indent, Node* node );
39static void PPrintPhp( TidyDocImpl* doc, uint indent, Node* node );
40static int  TextEndsWithNewline( Lexer *lexer, Node *node, uint mode );
41static int  TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode );
42static Bool InsideHead( TidyDocImpl* doc, Node *node );
43static Bool ShouldIndent( TidyDocImpl* doc, Node *node );
44
45#if SUPPORT_ASIAN_ENCODINGS
46/* #431953 - start RJ Wraplen adjusted for smooth international ride */
47
48#if 0
49uint CWrapLen( TidyDocImpl* doc, uint ind )
50{
51    ctmbstr lang = cfgStr( doc, TidyLanguage );
52    uint wraplen = cfg( doc, TidyWrapLen );
53
54    if ( !TY_(tmbstrcasecmp)(lang, "zh") )
55        /* Chinese characters take two positions on a fixed-width screen */
56        /* It would be more accurate to keep a parallel linelen and wraphere
57           incremented by 2 for Chinese characters and 1 otherwise, but this
58           is way simpler.
59        */
60        return (ind + (( wraplen - ind ) / 2)) ;
61
62    if ( !TY_(tmbstrcasecmp)(lang, "ja") )
63        /* average Japanese text is 30% kanji */
64        return (ind + ((( wraplen - ind ) * 7) / 10)) ;
65
66    return wraplen;
67}
68#endif
69
70typedef enum
71{
72  UC00, /* None                       */
73  UCPC, /* Punctuation, Connector     */
74  UCPD, /* Punctuation, Dash          */
75  UCPE, /* Punctuation, Close         */
76  UCPS, /* Punctuation, Open          */
77  UCPI, /* Punctuation, Initial quote */
78  UCPF, /* Punctuation, Final quote   */
79  UCPO, /* Punctuation, Other         */
80  UCZS, /* Separator, Space           */
81  UCZL, /* Separator, Line            */
82  UCZP  /* Separator, Paragraph       */
83} UnicodeCategory;
84
85/*
86  From the original code, the following characters are removed:
87
88    U+2011 (non-breaking hyphen)
89    U+202F (narrow non-break space)
90    U+2044 (fraction slash)
91    U+200B (zero width space)
92    ...... (bidi formatting control characters)
93
94  U+2011 and U+202F are non-breaking, U+2044 is a Sm character,
95  U+200B is a non-visible space, wrapping after it would make
96  this space visible, bidi should be done using HTML features
97  and the characters are neither Px or Zx.
98
99  The following Unicode 3.0 punctuation characters are added:
100
101    U+2048 (question exclamation mark)
102    U+2049 (exclamation question mark)
103    U+204A (tironian sign et)
104    U+204B (reversed pilcrow sign)
105    U+204C (black leftwards bullet)
106    U+204D (black rightwards bullet)
107    U+3030 (wavy dash)
108    U+30FB (katakana middle dot)
109    U+FE63 (small hyphen-minus)
110    U+FE68 (small reverse solidus)
111    U+FF3F (fullwidth low line)
112    U+FF5B (fullwidth left curly bracket)
113    U+FF5D (fullwidth right curly bracket)
114
115  Other additional characters were not included in Unicode 3.0.
116  The table is based on Unicode 4.0. It must include only those
117  characters marking a wrapping point, "before" if the general
118  category is UCPS or UCPI, otherwise "after".
119*/
120static struct _unicode4cat
121{
122  unsigned long code;
123  UnicodeCategory category;
124} const unicode4cat[] =
125{
126#if 0
127  { 0x037E, UCPO }, { 0x0387, UCPO }, { 0x055A, UCPO }, { 0x055B, UCPO },
128  { 0x055C, UCPO }, { 0x055D, UCPO }, { 0x055E, UCPO }, { 0x055F, UCPO },
129  { 0x0589, UCPO }, { 0x058A, UCPD }, { 0x05BE, UCPO }, { 0x05C0, UCPO },
130  { 0x05C3, UCPO }, { 0x05F3, UCPO }, { 0x05F4, UCPO }, { 0x060C, UCPO },
131  { 0x060D, UCPO }, { 0x061B, UCPO }, { 0x061F, UCPO }, { 0x066A, UCPO },
132  { 0x066B, UCPO }, { 0x066C, UCPO }, { 0x066D, UCPO }, { 0x06D4, UCPO },
133  { 0x0700, UCPO }, { 0x0701, UCPO }, { 0x0702, UCPO }, { 0x0703, UCPO },
134  { 0x0704, UCPO }, { 0x0705, UCPO }, { 0x0706, UCPO }, { 0x0707, UCPO },
135  { 0x0708, UCPO }, { 0x0709, UCPO }, { 0x070A, UCPO }, { 0x070B, UCPO },
136  { 0x070C, UCPO }, { 0x070D, UCPO }, { 0x0964, UCPO }, { 0x0965, UCPO },
137  { 0x0970, UCPO }, { 0x0DF4, UCPO }, { 0x0E4F, UCPO }, { 0x0E5A, UCPO },
138  { 0x0E5B, UCPO }, { 0x0F04, UCPO }, { 0x0F05, UCPO }, { 0x0F06, UCPO },
139  { 0x0F07, UCPO }, { 0x0F08, UCPO }, { 0x0F09, UCPO }, { 0x0F0A, UCPO },
140  { 0x0F0B, UCPO }, { 0x0F0D, UCPO }, { 0x0F0E, UCPO }, { 0x0F0F, UCPO },
141  { 0x0F10, UCPO }, { 0x0F11, UCPO }, { 0x0F12, UCPO }, { 0x0F3A, UCPS },
142  { 0x0F3B, UCPE }, { 0x0F3C, UCPS }, { 0x0F3D, UCPE }, { 0x0F85, UCPO },
143  { 0x104A, UCPO }, { 0x104B, UCPO }, { 0x104C, UCPO }, { 0x104D, UCPO },
144  { 0x104E, UCPO }, { 0x104F, UCPO }, { 0x10FB, UCPO }, { 0x1361, UCPO },
145  { 0x1362, UCPO }, { 0x1363, UCPO }, { 0x1364, UCPO }, { 0x1365, UCPO },
146  { 0x1366, UCPO }, { 0x1367, UCPO }, { 0x1368, UCPO }, { 0x166D, UCPO },
147  { 0x166E, UCPO }, { 0x1680, UCZS }, { 0x169B, UCPS }, { 0x169C, UCPE },
148  { 0x16EB, UCPO }, { 0x16EC, UCPO }, { 0x16ED, UCPO }, { 0x1735, UCPO },
149  { 0x1736, UCPO }, { 0x17D4, UCPO }, { 0x17D5, UCPO }, { 0x17D6, UCPO },
150  { 0x17D8, UCPO }, { 0x17D9, UCPO }, { 0x17DA, UCPO }, { 0x1800, UCPO },
151  { 0x1801, UCPO }, { 0x1802, UCPO }, { 0x1803, UCPO }, { 0x1804, UCPO },
152  { 0x1805, UCPO }, { 0x1806, UCPD }, { 0x1807, UCPO }, { 0x1808, UCPO },
153  { 0x1809, UCPO }, { 0x180A, UCPO }, { 0x180E, UCZS }, { 0x1944, UCPO },
154  { 0x1945, UCPO },
155#endif
156  { 0x2000, UCZS }, { 0x2001, UCZS }, { 0x2002, UCZS }, { 0x2003, UCZS },
157  { 0x2004, UCZS }, { 0x2005, UCZS }, { 0x2006, UCZS }, { 0x2008, UCZS },
158  { 0x2009, UCZS }, { 0x200A, UCZS }, { 0x2010, UCPD }, { 0x2012, UCPD },
159  { 0x2013, UCPD }, { 0x2014, UCPD }, { 0x2015, UCPD }, { 0x2016, UCPO },
160  { 0x2017, UCPO }, { 0x2018, UCPI }, { 0x2019, UCPF }, { 0x201A, UCPS },
161  { 0x201B, UCPI }, { 0x201C, UCPI }, { 0x201D, UCPF }, { 0x201E, UCPS },
162  { 0x201F, UCPI }, { 0x2020, UCPO }, { 0x2021, UCPO }, { 0x2022, UCPO },
163  { 0x2023, UCPO }, { 0x2024, UCPO }, { 0x2025, UCPO }, { 0x2026, UCPO },
164  { 0x2027, UCPO }, { 0x2028, UCZL }, { 0x2029, UCZP }, { 0x2030, UCPO },
165  { 0x2031, UCPO }, { 0x2032, UCPO }, { 0x2033, UCPO }, { 0x2034, UCPO },
166  { 0x2035, UCPO }, { 0x2036, UCPO }, { 0x2037, UCPO }, { 0x2038, UCPO },
167  { 0x2039, UCPI }, { 0x203A, UCPF }, { 0x203B, UCPO }, { 0x203C, UCPO },
168  { 0x203D, UCPO }, { 0x203E, UCPO }, { 0x203F, UCPC }, { 0x2040, UCPC },
169  { 0x2041, UCPO }, { 0x2042, UCPO }, { 0x2043, UCPO }, { 0x2045, UCPS },
170  { 0x2046, UCPE }, { 0x2047, UCPO }, { 0x2048, UCPO }, { 0x2049, UCPO },
171  { 0x204A, UCPO }, { 0x204B, UCPO }, { 0x204C, UCPO }, { 0x204D, UCPO },
172  { 0x204E, UCPO }, { 0x204F, UCPO }, { 0x2050, UCPO }, { 0x2051, UCPO },
173  { 0x2053, UCPO }, { 0x2054, UCPC }, { 0x2057, UCPO }, { 0x205F, UCZS },
174  { 0x207D, UCPS }, { 0x207E, UCPE }, { 0x208D, UCPS }, { 0x208E, UCPE },
175  { 0x2329, UCPS }, { 0x232A, UCPE }, { 0x23B4, UCPS }, { 0x23B5, UCPE },
176  { 0x23B6, UCPO }, { 0x2768, UCPS }, { 0x2769, UCPE }, { 0x276A, UCPS },
177  { 0x276B, UCPE }, { 0x276C, UCPS }, { 0x276D, UCPE }, { 0x276E, UCPS },
178  { 0x276F, UCPE }, { 0x2770, UCPS }, { 0x2771, UCPE }, { 0x2772, UCPS },
179  { 0x2773, UCPE }, { 0x2774, UCPS }, { 0x2775, UCPE }, { 0x27E6, UCPS },
180  { 0x27E7, UCPE }, { 0x27E8, UCPS }, { 0x27E9, UCPE }, { 0x27EA, UCPS },
181  { 0x27EB, UCPE }, { 0x2983, UCPS }, { 0x2984, UCPE }, { 0x2985, UCPS },
182  { 0x2986, UCPE }, { 0x2987, UCPS }, { 0x2988, UCPE }, { 0x2989, UCPS },
183  { 0x298A, UCPE }, { 0x298B, UCPS }, { 0x298C, UCPE }, { 0x298D, UCPS },
184  { 0x298E, UCPE }, { 0x298F, UCPS }, { 0x2990, UCPE }, { 0x2991, UCPS },
185  { 0x2992, UCPE }, { 0x2993, UCPS }, { 0x2994, UCPE }, { 0x2995, UCPS },
186  { 0x2996, UCPE }, { 0x2997, UCPS }, { 0x2998, UCPE }, { 0x29D8, UCPS },
187  { 0x29D9, UCPE }, { 0x29DA, UCPS }, { 0x29DB, UCPE }, { 0x29FC, UCPS },
188  { 0x29FD, UCPE }, { 0x3001, UCPO }, { 0x3002, UCPO }, { 0x3003, UCPO },
189  { 0x3008, UCPS }, { 0x3009, UCPE }, { 0x300A, UCPS }, { 0x300B, UCPE },
190  { 0x300C, UCPS }, { 0x300D, UCPE }, { 0x300E, UCPS }, { 0x300F, UCPE },
191  { 0x3010, UCPS }, { 0x3011, UCPE }, { 0x3014, UCPS }, { 0x3015, UCPE },
192  { 0x3016, UCPS }, { 0x3017, UCPE }, { 0x3018, UCPS }, { 0x3019, UCPE },
193  { 0x301A, UCPS }, { 0x301B, UCPE }, { 0x301C, UCPD }, { 0x301D, UCPS },
194  { 0x301E, UCPE }, { 0x301F, UCPE }, { 0x3030, UCPD }, { 0x303D, UCPO },
195  { 0x30A0, UCPD }, { 0x30FB, UCPC }, { 0xFD3E, UCPS }, { 0xFD3F, UCPE },
196  { 0xFE30, UCPO }, { 0xFE31, UCPD }, { 0xFE32, UCPD }, { 0xFE33, UCPC },
197  { 0xFE34, UCPC }, { 0xFE35, UCPS }, { 0xFE36, UCPE }, { 0xFE37, UCPS },
198  { 0xFE38, UCPE }, { 0xFE39, UCPS }, { 0xFE3A, UCPE }, { 0xFE3B, UCPS },
199  { 0xFE3C, UCPE }, { 0xFE3D, UCPS }, { 0xFE3E, UCPE }, { 0xFE3F, UCPS },
200  { 0xFE40, UCPE }, { 0xFE41, UCPS }, { 0xFE42, UCPE }, { 0xFE43, UCPS },
201  { 0xFE44, UCPE }, { 0xFE45, UCPO }, { 0xFE46, UCPO }, { 0xFE47, UCPS },
202  { 0xFE48, UCPE }, { 0xFE49, UCPO }, { 0xFE4A, UCPO }, { 0xFE4B, UCPO },
203  { 0xFE4C, UCPO }, { 0xFE4D, UCPC }, { 0xFE4E, UCPC }, { 0xFE4F, UCPC },
204  { 0xFE50, UCPO }, { 0xFE51, UCPO }, { 0xFE52, UCPO }, { 0xFE54, UCPO },
205  { 0xFE55, UCPO }, { 0xFE56, UCPO }, { 0xFE57, UCPO }, { 0xFE58, UCPD },
206  { 0xFE59, UCPS }, { 0xFE5A, UCPE }, { 0xFE5B, UCPS }, { 0xFE5C, UCPE },
207  { 0xFE5D, UCPS }, { 0xFE5E, UCPE }, { 0xFE5F, UCPO }, { 0xFE60, UCPO },
208  { 0xFE61, UCPO }, { 0xFE63, UCPD }, { 0xFE68, UCPO }, { 0xFE6A, UCPO },
209  { 0xFE6B, UCPO }, { 0xFF01, UCPO }, { 0xFF02, UCPO }, { 0xFF03, UCPO },
210  { 0xFF05, UCPO }, { 0xFF06, UCPO }, { 0xFF07, UCPO }, { 0xFF08, UCPS },
211  { 0xFF09, UCPE }, { 0xFF0A, UCPO }, { 0xFF0C, UCPO }, { 0xFF0D, UCPD },
212  { 0xFF0E, UCPO }, { 0xFF0F, UCPO }, { 0xFF1A, UCPO }, { 0xFF1B, UCPO },
213  { 0xFF1F, UCPO }, { 0xFF20, UCPO }, { 0xFF3B, UCPS }, { 0xFF3C, UCPO },
214  { 0xFF3D, UCPE }, { 0xFF3F, UCPC }, { 0xFF5B, UCPS }, { 0xFF5D, UCPE },
215  { 0xFF5F, UCPS }, { 0xFF60, UCPE }, { 0xFF61, UCPO }, { 0xFF62, UCPS },
216  { 0xFF63, UCPE }, { 0xFF64, UCPO }, { 0xFF65, UCPC }, { 0x10100,UCPO },
217  { 0x10101,UCPO }, { 0x1039F,UCPO },
218
219  /* final entry */
220  { 0x0000, UC00 }
221};
222
223typedef enum
224{
225    NoWrapPoint,
226    WrapBefore,
227    WrapAfter
228} WrapPoint;
229
230/*
231  If long lines of text have no white space as defined in HTML 4
232  (U+0009, U+000A, U+000D, U+000C, U+0020) other characters could
233  be used to determine a wrap point. Since user agents would
234  normalize the inserted newline character to a space character,
235  this wrapping behaviour would insert visual whitespace into the
236  document.
237
238  Characters of the General Category Pi and Ps in the Unicode
239  character database (opening punctuation and intial quote
240  characters) mark a wrapping point before the character, other
241  punctuation characters (Pc, Pd, Pe, Pf, and Po), breakable
242  space characters (Zs), and paragraph and line separators
243  (Zl, Zp) mark a wrap point after the character. Using this
244  function Tidy can for example pretty print
245
246    <p>....................&ldquo;...quote...&rdquo;...</p>
247  as
248    <p>....................\n&ldquo;...quote...&rdquo;...</p>
249  or
250    <p>....................&ldquo;...quote...&rdquo;\n...</p>
251
252  if the next normal wrapping point would exceed the user
253  chosen wrapping column.
254*/
255static WrapPoint CharacterWrapPoint(tchar c)
256{
257    int i;
258    for (i = 0; unicode4cat[i].code && unicode4cat[i].code <= c; ++i)
259        if (unicode4cat[i].code == c)
260            /* wrapping before opening punctuation and initial quotes */
261            if (unicode4cat[i].category == UCPS ||
262                unicode4cat[i].category == UCPI)
263                return WrapBefore;
264            /* else wrapping after this character */
265            else
266                return WrapAfter;
267    /* character has no effect on line wrapping */
268    return NoWrapPoint;
269}
270
271static WrapPoint Big5WrapPoint(tchar c)
272{
273    if ((c & 0xFF00) == 0xA100)
274    {
275        /* opening brackets have odd codes: break before them */
276        if ( c > 0xA15C && c < 0xA1AD && (c & 1) == 1 )
277            return WrapBefore;
278        return WrapAfter;
279    }
280    return NoWrapPoint;
281}
282
283#endif /* SUPPORT_ASIAN_ENCODINGS */
284
285static void InitIndent( TidyIndent* ind )
286{
287    ind->spaces = -1;
288    ind->attrValStart = -1;
289    ind->attrStringStart = -1;
290}
291
292void TY_(InitPrintBuf)( TidyDocImpl* doc )
293{
294    ClearMemory( &doc->pprint, sizeof(TidyPrintImpl) );
295    InitIndent( &doc->pprint.indent[0] );
296    InitIndent( &doc->pprint.indent[1] );
297}
298
299void TY_(FreePrintBuf)( TidyDocImpl* doc )
300{
301    MemFree( doc->pprint.linebuf );
302    TY_(InitPrintBuf)( doc );
303}
304
305static void expand( TidyPrintImpl* pprint, uint len )
306{
307    uint* ip;
308    uint buflen = pprint->lbufsize;
309
310    if ( buflen == 0 )
311        buflen = 256;
312    while ( len >= buflen )
313        buflen *= 2;
314
315    ip = (uint*) MemRealloc( pprint->linebuf, buflen*sizeof(uint) );
316    if ( ip )
317    {
318      ClearMemory( ip+pprint->lbufsize,
319                   (buflen-pprint->lbufsize)*sizeof(uint) );
320      pprint->lbufsize = buflen;
321      pprint->linebuf = ip;
322    }
323}
324
325static uint GetSpaces( TidyPrintImpl* pprint )
326{
327    int spaces = pprint->indent[ 0 ].spaces;
328    return ( spaces < 0 ? 0U : (uint) spaces );
329}
330static int ClearInString( TidyPrintImpl* pprint )
331{
332    TidyIndent *ind = pprint->indent + pprint->ixInd;
333    return ind->attrStringStart = -1;
334}
335static int ToggleInString( TidyPrintImpl* pprint )
336{
337    TidyIndent *ind = pprint->indent + pprint->ixInd;
338    Bool inString = ( ind->attrStringStart >= 0 );
339    return ind->attrStringStart = ( inString ? -1 : (int) pprint->linelen );
340}
341static Bool IsInString( TidyPrintImpl* pprint )
342{
343    TidyIndent *ind = pprint->indent + 0; /* Always 1st */
344    return ( ind->attrStringStart >= 0 &&
345             ind->attrStringStart < (int) pprint->linelen );
346}
347static Bool IsWrapInString( TidyPrintImpl* pprint )
348{
349    TidyIndent *ind = pprint->indent + 0; /* Always 1st */
350    int wrap = (int) pprint->wraphere;
351    return ( ind->attrStringStart == 0 ||
352             (ind->attrStringStart > 0 && ind->attrStringStart < wrap) );
353}
354
355static Bool HasMixedContent (Node *element)
356{
357    Node * node;
358
359    if (!element)
360        return no;
361
362    for (node = element->content; node; node = node->next)
363        if ( TY_(nodeIsText)(node) )
364             return yes;
365
366    return no;
367}
368
369static void ClearInAttrVal( TidyPrintImpl* pprint )
370{
371    TidyIndent *ind = pprint->indent + pprint->ixInd;
372    ind->attrValStart = -1;
373}
374static int SetInAttrVal( TidyPrintImpl* pprint )
375{
376    TidyIndent *ind = pprint->indent + pprint->ixInd;
377    return ind->attrValStart = (int) pprint->linelen;
378}
379static Bool IsWrapInAttrVal( TidyPrintImpl* pprint )
380{
381    TidyIndent *ind = pprint->indent + 0; /* Always 1st */
382    int wrap = (int) pprint->wraphere;
383    return ( ind->attrValStart == 0 ||
384             (ind->attrValStart > 0 && ind->attrValStart < wrap) );
385}
386
387static Bool WantIndent( TidyDocImpl* doc )
388{
389    TidyPrintImpl* pprint = &doc->pprint;
390    Bool wantIt = GetSpaces(pprint) > 0;
391    if ( wantIt )
392    {
393        Bool indentAttrs = cfgBool( doc, TidyIndentAttributes );
394        wantIt = ( ( !IsWrapInAttrVal(pprint) || indentAttrs ) &&
395                   !IsWrapInString(pprint) );
396    }
397    return wantIt;
398}
399
400
401static uint  WrapOff( TidyDocImpl* doc )
402{
403    uint saveWrap = cfg( doc, TidyWrapLen );
404    TY_(SetOptionInt)( doc, TidyWrapLen, 0xFFFFFFFF );  /* very large number */
405    return saveWrap;
406}
407
408static void  WrapOn( TidyDocImpl* doc, uint saveWrap )
409{
410    TY_(SetOptionInt)( doc, TidyWrapLen, saveWrap );
411}
412
413static uint  WrapOffCond( TidyDocImpl* doc, Bool onoff )
414{
415    if ( onoff )
416        return WrapOff( doc );
417    return cfg( doc, TidyWrapLen );
418}
419
420
421static void AddC( TidyPrintImpl* pprint, uint c, uint string_index)
422{
423    if ( string_index + 1 >= pprint->lbufsize )
424        expand( pprint, string_index + 1 );
425    pprint->linebuf[string_index] = c;
426}
427
428static uint AddChar( TidyPrintImpl* pprint, uint c )
429{
430    AddC( pprint, c, pprint->linelen );
431    return ++pprint->linelen;
432}
433
434static uint AddAsciiString( TidyPrintImpl* pprint, ctmbstr str, uint string_index )
435{
436    uint ix, len = TY_(tmbstrlen)( str );
437    if ( string_index + len >= pprint->lbufsize )
438        expand( pprint, string_index + len );
439
440    for ( ix=0; ix<len; ++ix )
441        pprint->linebuf[string_index + ix] = str[ ix ];
442    return string_index + len;
443}
444
445static uint AddString( TidyPrintImpl* pprint, ctmbstr str )
446{
447   return pprint->linelen = AddAsciiString( pprint, str, pprint->linelen );
448}
449
450/* Saves current output point as the wrap point,
451** but only if indentation would NOT overflow
452** the current line.  Otherwise keep previous wrap point.
453*/
454static Bool SetWrap( TidyDocImpl* doc, uint indent )
455{
456    TidyPrintImpl* pprint = &doc->pprint;
457    Bool wrap = ( indent + pprint->linelen < cfg(doc, TidyWrapLen) );
458    if ( wrap )
459    {
460        if ( pprint->indent[0].spaces < 0 )
461            pprint->indent[0].spaces = indent;
462        pprint->wraphere = pprint->linelen;
463    }
464    else if ( pprint->ixInd == 0 )
465    {
466        /* Save indent 1st time we pass the the wrap line */
467        pprint->indent[ 1 ].spaces = indent;
468        pprint->ixInd = 1;
469    }
470    return wrap;
471}
472
473static void CarryOver( int* valTo, int* valFrom, uint wrapPoint )
474{
475  if ( *valFrom > (int) wrapPoint )
476  {
477    *valTo = *valFrom - wrapPoint;
478    *valFrom = -1;
479  }
480}
481
482
483static Bool SetWrapAttr( TidyDocImpl* doc,
484                         uint indent, int attrStart, int strStart )
485{
486    TidyPrintImpl* pprint = &doc->pprint;
487    TidyIndent *ind = pprint->indent + 0;
488
489    Bool wrap = ( indent + pprint->linelen < cfg(doc, TidyWrapLen) );
490    if ( wrap )
491    {
492        if ( ind[0].spaces < 0 )
493            ind[0].spaces = indent;
494        pprint->wraphere = pprint->linelen;
495    }
496    else if ( pprint->ixInd == 0 )
497    {
498        /* Save indent 1st time we pass the the wrap line */
499        pprint->indent[ 1 ].spaces = indent;
500        pprint->ixInd = 1;
501
502        /* Carry over string state */
503        CarryOver( &ind[1].attrStringStart, &ind[0].attrStringStart, pprint->wraphere );
504        CarryOver( &ind[1].attrValStart, &ind[0].attrValStart, pprint->wraphere );
505    }
506    ind += doc->pprint.ixInd;
507    ind->attrValStart = attrStart;
508    ind->attrStringStart = strStart;
509    return wrap;
510}
511
512
513/* Reset indent state after flushing a new line
514*/
515static void ResetLine( TidyPrintImpl* pprint )
516{
517    TidyIndent* ind = pprint->indent + 0;
518    if ( pprint->ixInd > 0 )
519    {
520        ind[0] = ind[1];
521        InitIndent( &ind[1] );
522    }
523
524    if ( pprint->wraphere > 0 )
525    {
526        int wrap = (int) pprint->wraphere;
527        if ( ind[0].attrStringStart > wrap )
528            ind[0].attrStringStart -= wrap;
529        if ( ind[0].attrValStart > wrap )
530            ind[0].attrValStart -= wrap;
531    }
532    else
533    {
534        if ( ind[0].attrStringStart > 0 )
535            ind[0].attrStringStart = 0;
536        if ( ind[0].attrValStart > 0 )
537            ind[0].attrValStart = 0;
538    }
539    pprint->wraphere = pprint->ixInd = 0;
540}
541
542/* Shift text after wrap point to
543** beginning of next line.
544*/
545static void ResetLineAfterWrap( TidyPrintImpl* pprint )
546{
547    if ( pprint->linelen > pprint->wraphere )
548    {
549        uint *p = pprint->linebuf;
550        uint *q = p + pprint->wraphere;
551        uint *end = p + pprint->linelen;
552
553        if ( ! IsWrapInAttrVal(pprint) )
554        {
555            while ( q < end && *q == ' ' )
556                ++q, ++pprint->wraphere;
557        }
558
559        while ( q < end )
560            *p++ = *q++;
561
562        pprint->linelen -= pprint->wraphere;
563    }
564    else
565    {
566        pprint->linelen = 0;
567    }
568
569    ResetLine( pprint );
570}
571
572/* Goes ahead with writing current line up to
573** previously saved wrap point.  Shifts unwritten
574** text in output buffer to beginning of next line.
575*/
576static void WrapLine( TidyDocImpl* doc )
577{
578    TidyPrintImpl* pprint = &doc->pprint;
579    uint i;
580
581    if ( pprint->wraphere == 0 )
582        return;
583
584    if ( WantIndent(doc) )
585    {
586        uint spaces = GetSpaces( pprint );
587        for ( i = 0; i < spaces; ++i )
588            TY_(WriteChar)( ' ', doc->docOut );
589    }
590
591    for ( i = 0; i < pprint->wraphere; ++i )
592        TY_(WriteChar)( pprint->linebuf[i], doc->docOut );
593
594    if ( IsWrapInString(pprint) )
595        TY_(WriteChar)( '\\', doc->docOut );
596
597    TY_(WriteChar)( '\n', doc->docOut );
598    ResetLineAfterWrap( pprint );
599}
600
601/* Checks current output line length along with current indent.
602** If combined they overflow output line length, go ahead
603** and flush output up to the current wrap point.
604*/
605static Bool CheckWrapLine( TidyDocImpl* doc )
606{
607    TidyPrintImpl* pprint = &doc->pprint;
608    if ( GetSpaces(pprint) + pprint->linelen >= cfg(doc, TidyWrapLen) )
609    {
610        WrapLine( doc );
611        return yes;
612    }
613    return no;
614}
615
616static Bool CheckWrapIndent( TidyDocImpl* doc, uint indent )
617{
618    TidyPrintImpl* pprint = &doc->pprint;
619    if ( GetSpaces(pprint) + pprint->linelen >= cfg(doc, TidyWrapLen) )
620    {
621        WrapLine( doc );
622        if ( pprint->indent[ 0 ].spaces < 0 )
623            pprint->indent[ 0 ].spaces = indent;
624        return yes;
625    }
626    return no;
627}
628
629static void WrapAttrVal( TidyDocImpl* doc )
630{
631    TidyPrintImpl* pprint = &doc->pprint;
632    uint i;
633
634    /* assert( IsWrapInAttrVal(pprint) ); */
635    if ( WantIndent(doc) )
636    {
637        uint spaces = GetSpaces( pprint );
638        for ( i = 0; i < spaces; ++i )
639            TY_(WriteChar)( ' ', doc->docOut );
640    }
641
642    for ( i = 0; i < pprint->wraphere; ++i )
643        TY_(WriteChar)( pprint->linebuf[i], doc->docOut );
644
645    if ( IsWrapInString(pprint) )
646        TY_(WriteChar)( '\\', doc->docOut );
647    else
648        TY_(WriteChar)( ' ', doc->docOut );
649
650    TY_(WriteChar)( '\n', doc->docOut );
651    ResetLineAfterWrap( pprint );
652}
653
654void TY_(PFlushLine)( TidyDocImpl* doc, uint indent )
655{
656    TidyPrintImpl* pprint = &doc->pprint;
657
658    if ( pprint->linelen > 0 )
659    {
660        uint i;
661
662        CheckWrapLine( doc );
663
664        if ( WantIndent(doc) )
665        {
666            uint spaces = GetSpaces( pprint );
667            for ( i = 0; i < spaces; ++i )
668                TY_(WriteChar)( ' ', doc->docOut );
669        }
670
671        for ( i = 0; i < pprint->linelen; ++i )
672            TY_(WriteChar)( pprint->linebuf[i], doc->docOut );
673
674        if ( IsInString(pprint) )
675            TY_(WriteChar)( '\\', doc->docOut );
676        ResetLine( pprint );
677        pprint->linelen = 0;
678    }
679
680    TY_(WriteChar)( '\n', doc->docOut );
681    pprint->indent[ 0 ].spaces = indent;
682}
683
684static void PCondFlushLine( TidyDocImpl* doc, uint indent )
685{
686    TidyPrintImpl* pprint = &doc->pprint;
687    if ( pprint->linelen > 0 )
688    {
689        uint i;
690
691        CheckWrapLine( doc );
692
693        if ( WantIndent(doc) )
694        {
695            uint spaces = GetSpaces( pprint );
696            for ( i = 0; i < spaces; ++i )
697                TY_(WriteChar)(' ', doc->docOut);
698        }
699
700        for ( i = 0; i < pprint->linelen; ++i )
701            TY_(WriteChar)( pprint->linebuf[i], doc->docOut );
702
703        if ( IsInString(pprint) )
704            TY_(WriteChar)( '\\', doc->docOut );
705        ResetLine( pprint );
706
707        TY_(WriteChar)( '\n', doc->docOut );
708        pprint->indent[ 0 ].spaces = indent;
709        pprint->linelen = 0;
710    }
711}
712
713static void PPrintChar( TidyDocImpl* doc, uint c, uint mode )
714{
715    tmbchar entity[128];
716    ctmbstr p;
717    TidyPrintImpl* pprint  = &doc->pprint;
718    uint outenc = cfg( doc, TidyOutCharEncoding );
719    Bool qmark = cfgBool( doc, TidyQuoteMarks );
720
721    if ( c == ' ' && !(mode & (PREFORMATTED | COMMENT | ATTRIBVALUE | CDATA)))
722    {
723        /* coerce a space character to a non-breaking space */
724        if (mode & NOWRAP)
725        {
726            ctmbstr ent = "&nbsp;";
727            /* by default XML doesn't define &nbsp; */
728            if ( cfgBool(doc, TidyNumEntities) || cfgBool(doc, TidyXmlTags) )
729                ent = "&#160;";
730            AddString( pprint, ent );
731            return;
732        }
733        else
734            pprint->wraphere = pprint->linelen;
735    }
736
737    /* comment characters are passed raw */
738    if ( mode & (COMMENT | CDATA) )
739    {
740        AddChar( pprint, c );
741        return;
742    }
743
744    /* except in CDATA map < to &lt; etc. */
745    if ( !(mode & CDATA) )
746    {
747        if ( c == '<')
748        {
749            AddString( pprint, "&lt;" );
750            return;
751        }
752
753        if ( c == '>')
754        {
755            AddString( pprint, "&gt;" );
756            return;
757        }
758
759        /*
760          naked '&' chars can be left alone or
761          quoted as &amp; The latter is required
762          for XML where naked '&' are illegal.
763        */
764/* Apple Changes:
765   2007-02-07 iccir When output encoding is RAW, the lexer is forced
766                    to insert the entire entity instead of a converted
767                    single character.  Hence, &'s will appear in the
768                    stream and must not be converted to &amp;
769
770                    See fix for [4642206] in lexer.c
771*/
772#ifdef TIDY_APPLE_CHANGES
773        if ( c == '&' && cfgBool(doc, TidyQuoteAmpersand) && (cfg(doc, TidyOutCharEncoding) != RAW) )
774#else
775        if ( c == '&' && cfgBool(doc, TidyQuoteAmpersand) )
776#endif
777        {
778            AddString( pprint, "&amp;" );
779            return;
780        }
781
782        if ( c == '"' && qmark )
783        {
784            AddString( pprint, "&quot;" );
785            return;
786        }
787
788        if ( c == '\'' && qmark )
789        {
790            AddString( pprint, "&#39;" );
791            return;
792        }
793
794        if ( c == 160 && outenc != RAW )
795        {
796            if ( cfgBool(doc, TidyQuoteNbsp) )
797            {
798                if ( cfgBool(doc, TidyNumEntities) ||
799                     cfgBool(doc, TidyXmlTags) )
800                    AddString( pprint, "&#160;" );
801                else
802                    AddString( pprint, "&nbsp;" );
803            }
804            else
805                AddChar( pprint, c );
806            return;
807        }
808    }
809
810#if SUPPORT_ASIAN_ENCODINGS
811
812    /* #431953 - start RJ */
813    /* Handle encoding-specific issues */
814    switch ( outenc )
815    {
816    case UTF8:
817#if SUPPORT_UTF16_ENCODINGS
818    case UTF16:
819    case UTF16LE:
820    case UTF16BE:
821#endif
822        if (!(mode & PREFORMATTED) && cfg(doc, TidyPunctWrap))
823        {
824            WrapPoint wp = CharacterWrapPoint(c);
825            if (wp == WrapBefore)
826                pprint->wraphere = pprint->linelen;
827            else if (wp == WrapAfter)
828                pprint->wraphere = pprint->linelen + 1;
829        }
830        break;
831
832    case BIG5:
833        /* Allow linebreak at Chinese punctuation characters */
834        /* There are not many spaces in Chinese */
835        AddChar( pprint, c );
836        if (!(mode & PREFORMATTED)  && cfg(doc, TidyPunctWrap))
837        {
838            WrapPoint wp = Big5WrapPoint(c);
839            if (wp == WrapBefore)
840                pprint->wraphere = pprint->linelen;
841            else if (wp == WrapAfter)
842                pprint->wraphere = pprint->linelen + 1;
843        }
844        return;
845
846    case SHIFTJIS:
847#ifndef NO_NATIVE_ISO2022_SUPPORT
848    case ISO2022: /* ISO 2022 characters are passed raw */
849#endif
850    case RAW:
851        AddChar( pprint, c );
852        return;
853    }
854    /* #431953 - end RJ */
855
856#else /* SUPPORT_ASIAN_ENCODINGS */
857
858    /* otherwise ISO 2022 characters are passed raw */
859    if (
860#ifndef NO_NATIVE_ISO2022_SUPPORT
861        outenc == ISO2022 ||
862#endif
863        outenc == RAW )
864    {
865        AddChar( pprint, c );
866        return;
867    }
868
869#endif /* SUPPORT_ASIAN_ENCODINGS */
870
871    /* don't map latin-1 chars to entities */
872    if ( outenc == LATIN1 )
873    {
874        if (c > 255)  /* multi byte chars */
875        {
876            uint vers = TY_(HTMLVersion)( doc );
877            if ( !cfgBool(doc, TidyNumEntities) && (p = TY_(EntityName)(c, vers)) )
878                TY_(tmbsnprintf)(entity, sizeof(entity), "&%s;", p);
879            else
880                TY_(tmbsnprintf)(entity, sizeof(entity), "&#%u;", c);
881
882            AddString( pprint, entity );
883            return;
884        }
885
886        if (c > 126 && c < 160)
887        {
888            TY_(tmbsnprintf)(entity, sizeof(entity), "&#%u;", c);
889            AddString( pprint, entity );
890            return;
891        }
892
893        AddChar( pprint, c );
894        return;
895    }
896
897    /* don't map UTF-8 chars to entities */
898    if ( outenc == UTF8 )
899    {
900        AddChar( pprint, c );
901        return;
902    }
903
904#if SUPPORT_UTF16_ENCODINGS
905    /* don't map UTF-16 chars to entities */
906    if ( outenc == UTF16 || outenc == UTF16LE || outenc == UTF16BE )
907    {
908        AddChar( pprint, c );
909        return;
910    }
911#endif
912
913    /* use numeric entities only  for XML */
914    if ( cfgBool(doc, TidyXmlTags) )
915    {
916        /* if ASCII use numeric entities for chars > 127 */
917        if ( c > 127 && outenc == ASCII )
918        {
919            TY_(tmbsnprintf)(entity, sizeof(entity), "&#%u;", c);
920            AddString( pprint, entity );
921            return;
922        }
923
924        /* otherwise output char raw */
925        AddChar( pprint, c );
926        return;
927    }
928
929    /* default treatment for ASCII */
930    if ( outenc == ASCII && (c > 126 || (c < ' ' && c != '\t')) )
931    {
932        uint vers = TY_(HTMLVersion)( doc );
933        if (!cfgBool(doc, TidyNumEntities) && (p = TY_(EntityName)(c, vers)) )
934            TY_(tmbsnprintf)(entity, sizeof(entity), "&%s;", p);
935        else
936            TY_(tmbsnprintf)(entity, sizeof(entity), "&#%u;", c);
937
938        AddString( pprint, entity );
939        return;
940    }
941
942    AddChar( pprint, c );
943}
944
945static uint IncrWS( uint start, uint end, uint indent, int ixWS )
946{
947  if ( ixWS > 0 )
948  {
949    uint st = start + MIN( (uint)ixWS, indent );
950    start = MIN( st, end );
951  }
952  return start;
953}
954/*
955  The line buffer is uint not char so we can
956  hold Unicode values unencoded. The translation
957  to UTF-8 is deferred to the TY_(WriteChar)() routine called
958  to flush the line buffer.
959*/
960static void PPrintText( TidyDocImpl* doc, uint mode, uint indent,
961                        Node* node  )
962{
963    uint start = node->start;
964    uint end = node->end;
965    uint ix, c = 0;
966    int  ixNL = TextEndsWithNewline( doc->lexer, node, mode );
967    int  ixWS = TextStartsWithWhitespace( doc->lexer, node, start, mode );
968    if ( ixNL > 0 )
969      end -= ixNL;
970    start = IncrWS( start, end, indent, ixWS );
971
972    for ( ix = start; ix < end; ++ix )
973    {
974        CheckWrapIndent( doc, indent );
975        /*
976        if ( CheckWrapIndent(doc, indent) )
977        {
978            ixWS = TextStartsWithWhitespace( doc->lexer, node, ix );
979            ix = IncrWS( ix, end, indent, ixWS );
980        }
981        */
982        c = (byte) doc->lexer->lexbuf[ix];
983
984        /* look for UTF-8 multibyte character */
985        if ( c > 0x7F )
986             ix += TY_(GetUTF8)( doc->lexer->lexbuf + ix, &c );
987
988        if ( c == '\n' )
989        {
990            TY_(PFlushLine)( doc, indent );
991            ixWS = TextStartsWithWhitespace( doc->lexer, node, ix+1, mode );
992            ix = IncrWS( ix, end, indent, ixWS );
993        }
994        else
995        {
996            PPrintChar( doc, c, mode );
997        }
998    }
999}
1000
1001#if 0
1002static void PPrintString( TidyDocImpl* doc, uint indent, ctmbstr str )
1003{
1004    while ( *str != '\0' )
1005        AddChar( &doc->pprint, *str++ );
1006}
1007#endif /* 0 */
1008
1009
1010static void PPrintAttrValue( TidyDocImpl* doc, uint indent,
1011                             ctmbstr value, uint delim, Bool wrappable, Bool scriptAttr )
1012{
1013    TidyPrintImpl* pprint = &doc->pprint;
1014    Bool scriptlets = cfgBool(doc, TidyWrapScriptlets);
1015
1016    uint mode = PREFORMATTED | ATTRIBVALUE;
1017    if ( wrappable )
1018        mode = NORMAL | ATTRIBVALUE;
1019
1020    /* look for ASP, Tango or PHP instructions for computed attribute value */
1021    if ( value && value[0] == '<' )
1022    {
1023        if ( value[1] == '%' || value[1] == '@'||
1024             TY_(tmbstrncmp)(value, "<?php", 5) == 0 )
1025            mode |= CDATA;
1026    }
1027
1028    if ( delim == 0 )
1029        delim = '"';
1030
1031    AddChar( pprint, '=' );
1032
1033    /* don't wrap after "=" for xml documents */
1034    if ( !cfgBool(doc, TidyXmlOut) || cfgBool(doc, TidyXhtmlOut) )
1035    {
1036        SetWrap( doc, indent );
1037        CheckWrapIndent( doc, indent );
1038        /*
1039        if ( !SetWrap(doc, indent) )
1040            PCondFlushLine( doc, indent );
1041        */
1042    }
1043
1044    AddChar( pprint, delim );
1045
1046    if ( value )
1047    {
1048        uint wraplen = cfg( doc, TidyWrapLen );
1049        int attrStart = SetInAttrVal( pprint );
1050        int strStart = ClearInString( pprint );
1051
1052        while (*value != '\0')
1053        {
1054            uint c = *value;
1055
1056            if ( wrappable && c == ' ' )
1057                SetWrapAttr( doc, indent, attrStart, strStart );
1058
1059            if ( wrappable && pprint->wraphere > 0 &&
1060                 GetSpaces(pprint) + pprint->linelen >= wraplen )
1061                WrapAttrVal( doc );
1062
1063            if ( c == delim )
1064            {
1065                ctmbstr entity = (c == '"' ? "&quot;" : "&#39;");
1066                AddString( pprint, entity );
1067                ++value;
1068                continue;
1069            }
1070            else if (c == '"')
1071            {
1072                if ( cfgBool(doc, TidyQuoteMarks) )
1073                    AddString( pprint, "&quot;" );
1074                else
1075                    AddChar( pprint, c );
1076
1077                if ( delim == '\'' && scriptAttr && scriptlets )
1078                    strStart = ToggleInString( pprint );
1079
1080                ++value;
1081                continue;
1082            }
1083            else if ( c == '\'' )
1084            {
1085                if ( cfgBool(doc, TidyQuoteMarks) )
1086                    AddString( pprint, "&#39;" );
1087                else
1088                    AddChar( pprint, c );
1089
1090                if ( delim == '"' && scriptAttr && scriptlets )
1091                    strStart = ToggleInString( pprint );
1092
1093                ++value;
1094                continue;
1095            }
1096
1097            /* look for UTF-8 multibyte character */
1098            if ( c > 0x7F )
1099                 value += TY_(GetUTF8)( value, &c );
1100            ++value;
1101
1102            if ( c == '\n' )
1103            {
1104                /* No indent inside Javascript literals */
1105                TY_(PFlushLine)( doc, (strStart < 0
1106                                       && !cfgBool(doc, TidyLiteralAttribs) ?
1107                                       indent : 0) );
1108                continue;
1109            }
1110            PPrintChar( doc, c, mode );
1111        }
1112        ClearInAttrVal( pprint );
1113        ClearInString( pprint );
1114    }
1115    AddChar( pprint, delim );
1116}
1117
1118static uint AttrIndent( TidyDocImpl* doc, Node* node, AttVal* ARG_UNUSED(attr) )
1119{
1120  uint spaces = cfg( doc, TidyIndentSpaces );
1121  uint xtra = 2;  /* 1 for the '<', another for the ' ' */
1122  if ( node->element == NULL )
1123    return spaces;
1124
1125  if ( !TY_(nodeHasCM)(node, CM_INLINE) ||
1126       !ShouldIndent(doc, node->parent ? node->parent: node) )
1127    return xtra + TY_(tmbstrlen)( node->element );
1128
1129  if ( NULL != (node = TY_(FindContainer)(node)) )
1130    return xtra + TY_(tmbstrlen)( node->element );
1131  return spaces;
1132}
1133
1134static Bool AttrNoIndentFirst( /*TidyDocImpl* doc,*/ Node* node, AttVal* attr )
1135{
1136  return ( attr==node->attributes );
1137
1138  /*&&
1139           ( InsideHead(doc, node) ||
1140             !TY_(nodeHasCM)(node, CM_INLINE) ) );
1141             */
1142}
1143
1144static void PPrintAttribute( TidyDocImpl* doc, uint indent,
1145                             Node *node, AttVal *attr )
1146{
1147    TidyPrintImpl* pprint = &doc->pprint;
1148    Bool xmlOut    = cfgBool( doc, TidyXmlOut );
1149    Bool xhtmlOut  = cfgBool( doc, TidyXhtmlOut );
1150    Bool wrapAttrs = cfgBool( doc, TidyWrapAttVals );
1151    Bool ucAttrs   = cfgBool( doc, TidyUpperCaseAttrs );
1152    Bool indAttrs  = cfgBool( doc, TidyIndentAttributes );
1153    uint xtra      = AttrIndent( doc, node, attr );
1154    Bool first     = AttrNoIndentFirst( /*doc,*/ node, attr );
1155    tmbstr name    = attr->attribute;
1156    Bool wrappable = no;
1157    tchar c;
1158
1159    /* fix for odd attribute indentation bug triggered by long values */
1160    if (!indAttrs)
1161      xtra = 0;
1162
1163    if ( indAttrs )
1164    {
1165        if ( TY_(nodeIsElement)(node) && !first )
1166        {
1167            indent += xtra;
1168            PCondFlushLine( doc, indent );
1169        }
1170        else
1171          indAttrs = no;
1172    }
1173
1174    CheckWrapIndent( doc, indent );
1175
1176    if ( !xmlOut && !xhtmlOut && attr->dict )
1177    {
1178        if ( TY_(IsScript)(doc, name) )
1179            wrappable = cfgBool( doc, TidyWrapScriptlets );
1180        else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr)) && wrapAttrs )
1181            wrappable = yes;
1182    }
1183
1184    if ( !first && !SetWrap(doc, indent) )
1185    {
1186        TY_(PFlushLine)( doc, indent+xtra );  /* Put it on next line */
1187    }
1188    else if ( pprint->linelen > 0 )
1189    {
1190        AddChar( pprint, ' ' );
1191    }
1192
1193    /* Attribute name */
1194    while (*name)
1195    {
1196        c = (unsigned char)*name;
1197
1198        if (c > 0x7F)
1199            name += TY_(GetUTF8)(name, &c);
1200        else if (ucAttrs)
1201            c = TY_(ToUpper)(c);
1202
1203        AddChar(pprint, c);
1204        ++name;
1205    }
1206
1207/* fix for bug 732038 */
1208#if 0
1209    /* If not indenting attributes, bump up indent for
1210    ** value after putting out name.
1211    */
1212    if ( !indAttrs )
1213        indent += xtra;
1214#endif
1215
1216    CheckWrapIndent( doc, indent );
1217
1218    if ( attr->value == NULL )
1219    {
1220        Bool isB = TY_(IsBoolAttribute)(attr);
1221        Bool scriptAttr = TY_(attrIsEvent)(attr);
1222
1223        if ( xmlOut )
1224            PPrintAttrValue( doc, indent, isB ? attr->attribute : NULLSTR,
1225                             attr->delim, no, scriptAttr );
1226
1227        else if ( !isB && !TY_(IsNewNode)(node) )
1228            PPrintAttrValue( doc, indent, "", attr->delim, yes, scriptAttr );
1229
1230        else
1231            SetWrap( doc, indent );
1232    }
1233    else
1234        PPrintAttrValue( doc, indent, attr->value, attr->delim, wrappable, no );
1235}
1236
1237static void PPrintAttrs( TidyDocImpl* doc, uint indent, Node *node )
1238{
1239    TidyPrintImpl* pprint = &doc->pprint;
1240    AttVal* av;
1241
1242    /* add xml:space attribute to pre and other elements */
1243    if ( cfgBool(doc, TidyXmlOut) && cfgBool(doc, TidyXmlSpace) &&
1244         !TY_(GetAttrByName)(node, "xml:space") &&
1245         TY_(XMLPreserveWhiteSpace)(doc, node) )
1246    {
1247        TY_(AddAttribute)( doc, node, "xml:space", "preserve" );
1248    }
1249
1250    for ( av = node->attributes; av; av = av->next )
1251    {
1252        if ( av->attribute != NULL )
1253        {
1254            PPrintAttribute( doc, indent, node, av );
1255        }
1256        else if ( av->asp != NULL )
1257        {
1258            AddChar( pprint, ' ' );
1259            PPrintAsp( doc, indent, av->asp );
1260        }
1261        else if ( av->php != NULL )
1262        {
1263            AddChar( pprint, ' ' );
1264            PPrintPhp( doc, indent, av->php );
1265        }
1266    }
1267}
1268
1269/*
1270 Line can be wrapped immediately after inline start tag provided
1271 if follows a text node ending in a space, or it follows a <br>,
1272 or its parent is an inline element that that rule applies to.
1273 This behaviour was reverse engineered from Netscape 3.0.
1274
1275 Line wrapping can occur if an element is not empty and before a block
1276 level. For instance:
1277 <p><span>
1278 x</span>y</p>
1279 will display properly. Whereas
1280 <p><img />
1281 x<</p> won't.
1282*/
1283static Bool AfterSpaceImp(Lexer *lexer, Node *node, Bool isEmpty)
1284{
1285    Node *prev;
1286
1287    if ( !TY_(nodeCMIsInline)(node) )
1288        return yes;
1289
1290    prev = node->prev;
1291    if (prev)
1292    {
1293        if (TY_(nodeIsText)(prev) && prev->end > prev->start)
1294        {
1295            uint i, c = '\0'; /* initialised to avoid warnings */
1296            for (i = prev->start; i < prev->end; ++i)
1297            {
1298                c = (byte) lexer->lexbuf[i];
1299                if ( c > 0x7F )
1300                    i += TY_(GetUTF8)( lexer->lexbuf + i, &c );
1301            }
1302
1303            if ( c == ' ' || c == '\n' )
1304                return yes;
1305        }
1306        else if (nodeIsBR(prev))
1307            return yes;
1308
1309        return no;
1310    }
1311
1312    if ( isEmpty && !TY_(nodeCMIsInline)(node->parent) )
1313        return no;
1314
1315    return AfterSpaceImp(lexer, node->parent, isEmpty);
1316}
1317
1318static Bool AfterSpace(Lexer *lexer, Node *node)
1319{
1320    return AfterSpaceImp(lexer, node, TY_(nodeCMIsEmpty)(node));
1321}
1322
1323static void PPrintTag( TidyDocImpl* doc,
1324                       uint mode, uint indent, Node *node )
1325{
1326    TidyPrintImpl* pprint = &doc->pprint;
1327    Bool uc = cfgBool( doc, TidyUpperCaseTags );
1328    Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1329    Bool xmlOut = cfgBool( doc, TidyXmlOut );
1330    tchar c;
1331    tmbstr s = node->element;
1332
1333    AddChar( pprint, '<' );
1334
1335    if ( node->type == EndTag )
1336        AddChar( pprint, '/' );
1337
1338    if (s)
1339    {
1340        while (*s)
1341        {
1342            c = (unsigned char)*s;
1343
1344            if (c > 0x7F)
1345                s += TY_(GetUTF8)(s, &c);
1346            else if (uc)
1347                c = TY_(ToUpper)(c);
1348
1349            AddChar(pprint, c);
1350            ++s;
1351        }
1352    }
1353
1354    PPrintAttrs( doc, indent, node );
1355
1356    if ( (xmlOut || xhtmlOut) &&
1357         (node->type == StartEndTag || TY_(nodeCMIsEmpty)(node)) )
1358    {
1359        AddChar( pprint, ' ' );   /* Space is NS compatibility hack <br /> */
1360        AddChar( pprint, '/' );   /* Required end tag marker */
1361    }
1362
1363    AddChar( pprint, '>' );
1364
1365    if ( (node->type != StartEndTag || xhtmlOut) && !(mode & PREFORMATTED) )
1366    {
1367        uint wraplen = cfg( doc, TidyWrapLen );
1368        CheckWrapIndent( doc, indent );
1369
1370        if ( indent + pprint->linelen < wraplen )
1371        {
1372            /* wrap after start tag if is <br/> or if it's not inline.
1373               Technically, it would be safe to call only AfterSpace.
1374               However, it would disrupt the existing algorithm. So let's
1375               leave as is. Note that AfterSpace returns true for non inline
1376               elements but can still be false for some <br>. So it has to
1377               stay as well. */
1378            if (!(mode & NOWRAP) && (!TY_(nodeCMIsInline)(node) || nodeIsBR(node))
1379                && AfterSpace(doc->lexer, node))
1380            {
1381                pprint->wraphere = pprint->linelen;
1382            }
1383        }
1384        /* flush the current buffer only if it is known to be safe,
1385           i.e. it will not introduce some spurious white spaces.
1386           See bug #996484 */
1387        else if ( mode & NOWRAP ||
1388                  nodeIsBR(node) || AfterSpace(doc->lexer, node))
1389            PCondFlushLine( doc, indent );
1390    }
1391}
1392
1393static void PPrintEndTag( TidyDocImpl* doc, uint ARG_UNUSED(mode),
1394                          uint ARG_UNUSED(indent), Node *node )
1395{
1396    TidyPrintImpl* pprint = &doc->pprint;
1397    Bool uc = cfgBool( doc, TidyUpperCaseTags );
1398    tmbstr s = node->element;
1399    tchar c;
1400
1401   /*
1402     Netscape ignores SGML standard by not ignoring a
1403     line break before </A> or </U> etc. To avoid rendering
1404     this as an underlined space, I disable line wrapping
1405     before inline end tags by the #if 0 ... #endif
1406   */
1407#if 0
1408    if ( !(mode & NOWRAP) )
1409        SetWrap( doc, indent );
1410#endif
1411
1412    AddString( pprint, "</" );
1413
1414    if (s)
1415    {
1416        while (*s)
1417        {
1418             c = (unsigned char)*s;
1419
1420             if (c > 0x7F)
1421                 s += TY_(GetUTF8)(s, &c);
1422             else if (uc)
1423                 c = TY_(ToUpper)(c);
1424
1425             AddChar(pprint, c);
1426             ++s;
1427        }
1428    }
1429
1430    AddChar( pprint, '>' );
1431}
1432
1433static void PPrintComment( TidyDocImpl* doc, uint indent, Node* node )
1434{
1435    TidyPrintImpl* pprint = &doc->pprint;
1436
1437    SetWrap( doc, indent );
1438    AddString( pprint, "<!--" );
1439
1440#if 0
1441    SetWrap( doc, indent );
1442#endif
1443
1444    PPrintText(doc, COMMENT, 0, node);
1445
1446#if 0
1447    SetWrap( doc, indent );
1448    AddString( pprint, "--" );
1449#endif
1450
1451    AddString(pprint, "--");
1452    AddChar( pprint, '>' );
1453    if ( node->linebreak && node->next )
1454        TY_(PFlushLine)( doc, indent );
1455}
1456
1457static void PPrintDocType( TidyDocImpl* doc, uint indent, Node *node )
1458{
1459    TidyPrintImpl* pprint = &doc->pprint;
1460    uint wraplen = cfg( doc, TidyWrapLen );
1461    uint spaces = cfg( doc, TidyIndentSpaces );
1462    AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
1463    AttVal* sys = TY_(GetAttrByName)(node, "SYSTEM");
1464
1465    /* todo: handle non-ASCII characters in FPI / SI / node->element */
1466
1467    SetWrap( doc, indent );
1468    PCondFlushLine( doc, indent );
1469
1470    AddString( pprint, "<!DOCTYPE " );
1471    SetWrap( doc, indent );
1472    if (node->element)
1473    {
1474        AddString(pprint, node->element);
1475    }
1476
1477    if (fpi && fpi->value)
1478    {
1479        AddString(pprint, " PUBLIC ");
1480        AddChar(pprint, fpi->delim);
1481        AddString(pprint, fpi->value);
1482        AddChar(pprint, fpi->delim);
1483    }
1484
1485    if (fpi && fpi->value && sys && sys->value)
1486    {
1487        uint i = pprint->linelen - (TY_(tmbstrlen)(sys->value) + 2) - 1;
1488        if (!(i>0&&TY_(tmbstrlen)(sys->value)+2+i<wraplen&&i<=(spaces?spaces:2)*2))
1489            i = 0;
1490
1491        PCondFlushLine(doc, i);
1492        if (pprint->linelen)
1493            AddChar(pprint, ' ');
1494    }
1495    else if (sys && sys->value)
1496    {
1497        AddString(pprint, " SYSTEM ");
1498    }
1499
1500    if (sys && sys->value)
1501    {
1502        AddChar(pprint, sys->delim);
1503        AddString(pprint, sys->value);
1504        AddChar(pprint, sys->delim);
1505    }
1506
1507    if (node->content)
1508    {
1509        PCondFlushLine(doc, indent);
1510        AddChar(pprint, '[');
1511        PPrintText(doc, CDATA, 0, node->content);
1512        AddChar(pprint, ']');
1513    }
1514
1515    SetWrap( doc, 0 );
1516    AddChar( pprint, '>' );
1517    PCondFlushLine( doc, indent );
1518}
1519
1520static void PPrintPI( TidyDocImpl* doc, uint indent, Node *node )
1521{
1522    TidyPrintImpl* pprint = &doc->pprint;
1523    tchar c;
1524    tmbstr s;
1525
1526    SetWrap( doc, indent );
1527    AddString( pprint, "<?" );
1528
1529    s = node->element;
1530
1531    while (s && *s)
1532    {
1533        c = (unsigned char)*s;
1534        if (c > 0x7F)
1535            s += TY_(GetUTF8)(s, &c);
1536        AddChar(pprint, c);
1537        ++s;
1538    }
1539
1540    /* set CDATA to pass < and > unescaped */
1541    PPrintText( doc, CDATA, indent, node );
1542
1543    if (cfgBool(doc, TidyXmlOut) ||
1544        cfgBool(doc, TidyXhtmlOut) || node->closed)
1545        AddChar( pprint, '?' );
1546
1547    AddChar( pprint, '>' );
1548    PCondFlushLine( doc, indent );
1549}
1550
1551static void PPrintXmlDecl( TidyDocImpl* doc, uint indent, Node *node )
1552{
1553    AttVal* att;
1554    uint saveWrap;
1555    TidyPrintImpl* pprint = &doc->pprint;
1556    Bool ucAttrs;
1557    SetWrap( doc, indent );
1558    saveWrap = WrapOff( doc );
1559
1560    /* no case translation for XML declaration pseudo attributes */
1561    ucAttrs = cfgBool(doc, TidyUpperCaseAttrs);
1562    TY_(SetOptionBool)(doc, TidyUpperCaseAttrs, no);
1563
1564    AddString( pprint, "<?xml" );
1565
1566    /* Force order of XML declaration attributes */
1567    /* PPrintAttrs( doc, indent, node ); */
1568    if ( NULL != (att = TY_(AttrGetById)(node, TidyAttr_VERSION)) )
1569      PPrintAttribute( doc, indent, node, att );
1570    if ( NULL != (att = TY_(AttrGetById)(node, TidyAttr_ENCODING)) )
1571      PPrintAttribute( doc, indent, node, att );
1572    if ( NULL != (att = TY_(GetAttrByName)(node, "standalone")) )
1573      PPrintAttribute( doc, indent, node, att );
1574
1575    /* restore old config value */
1576    TY_(SetOptionBool)(doc, TidyUpperCaseAttrs, ucAttrs);
1577
1578    if ( node->end <= 0 || doc->lexer->lexbuf[node->end - 1] != '?' )
1579        AddChar( pprint, '?' );
1580    AddChar( pprint, '>' );
1581    WrapOn( doc, saveWrap );
1582    TY_(PFlushLine)( doc, indent );
1583}
1584
1585/* note ASP and JSTE share <% ... %> syntax */
1586static void PPrintAsp( TidyDocImpl* doc, uint indent, Node *node )
1587{
1588    TidyPrintImpl* pprint = &doc->pprint;
1589    Bool wrapAsp  = cfgBool( doc, TidyWrapAsp );
1590    Bool wrapJste = cfgBool( doc, TidyWrapJste );
1591    uint saveWrap = WrapOffCond( doc, !wrapAsp || !wrapJste );
1592
1593#if 0
1594    SetWrap( doc, indent );
1595#endif
1596    AddString( pprint, "<%" );
1597    PPrintText( doc, (wrapAsp ? CDATA : COMMENT), indent, node );
1598    AddString( pprint, "%>" );
1599
1600    /* PCondFlushLine( doc, indent ); */
1601    WrapOn( doc, saveWrap );
1602}
1603
1604/* JSTE also supports <# ... #> syntax */
1605static void PPrintJste( TidyDocImpl* doc, uint indent, Node *node )
1606{
1607    TidyPrintImpl* pprint = &doc->pprint;
1608    Bool wrapAsp = cfgBool( doc, TidyWrapAsp );
1609    uint saveWrap = WrapOffCond( doc, !wrapAsp  );
1610
1611    AddString( pprint, "<#" );
1612    PPrintText( doc, (cfgBool(doc, TidyWrapJste) ? CDATA : COMMENT),
1613                indent, node );
1614    AddString( pprint, "#>" );
1615
1616    /* PCondFlushLine( doc, indent ); */
1617    WrapOn( doc, saveWrap );
1618}
1619
1620/* PHP is based on XML processing instructions */
1621static void PPrintPhp( TidyDocImpl* doc, uint indent, Node *node )
1622{
1623    TidyPrintImpl* pprint = &doc->pprint;
1624    Bool wrapPhp = cfgBool( doc, TidyWrapPhp );
1625    uint saveWrap = WrapOffCond( doc, !wrapPhp  );
1626#if 0
1627    SetWrap( doc, indent );
1628#endif
1629
1630    AddString( pprint, "<?" );
1631    PPrintText( doc, (wrapPhp ? CDATA : COMMENT),
1632                indent, node );
1633    AddString( pprint, "?>" );
1634
1635    /* PCondFlushLine( doc, indent ); */
1636    WrapOn( doc, saveWrap );
1637}
1638
1639static void PPrintCDATA( TidyDocImpl* doc, uint indent, Node *node )
1640{
1641    uint saveWrap;
1642    TidyPrintImpl* pprint = &doc->pprint;
1643    Bool indentCData = cfgBool( doc, TidyIndentCdata );
1644    if ( !indentCData )
1645        indent = 0;
1646
1647    PCondFlushLine( doc, indent );
1648    saveWrap = WrapOff( doc );        /* disable wrapping */
1649
1650    AddString( pprint, "<![CDATA[" );
1651    PPrintText( doc, COMMENT, indent, node );
1652    AddString( pprint, "]]>" );
1653
1654    PCondFlushLine( doc, indent );
1655    WrapOn( doc, saveWrap );          /* restore wrapping */
1656}
1657
1658static void PPrintSection( TidyDocImpl* doc, uint indent, Node *node )
1659{
1660    TidyPrintImpl* pprint = &doc->pprint;
1661    Bool wrapSect = cfgBool( doc, TidyWrapSection );
1662    uint saveWrap = WrapOffCond( doc, !wrapSect  );
1663#if 0
1664    SetWrap( doc, indent );
1665#endif
1666
1667    AddString( pprint, "<![" );
1668    PPrintText( doc, (wrapSect ? CDATA : COMMENT),
1669                indent, node );
1670    AddString( pprint, "]>" );
1671
1672    /* PCondFlushLine( doc, indent ); */
1673    WrapOn( doc, saveWrap );
1674}
1675
1676
1677#if 0
1678/*
1679** Print script and style elements. For XHTML, wrap the content as follows:
1680**
1681**     JavaScript:
1682**         //<![CDATA[
1683**             content
1684**         //]]>
1685**     VBScript:
1686**         '<![CDATA[
1687**             content
1688**         ']]>
1689**     CSS:
1690**         / *<![CDATA[* /     Extra spaces to keep compiler happy
1691**             content
1692**         / *]]>* /
1693**     other:
1694**         <![CDATA[
1695**             content
1696**         ]]>
1697*/
1698#endif
1699
1700static ctmbstr CDATA_START           = "<![CDATA[";
1701static ctmbstr CDATA_END             = "]]>";
1702static ctmbstr JS_COMMENT_START      = "//";
1703static ctmbstr JS_COMMENT_END        = "";
1704static ctmbstr VB_COMMENT_START      = "\'";
1705static ctmbstr VB_COMMENT_END        = "";
1706static ctmbstr CSS_COMMENT_START     = "/*";
1707static ctmbstr CSS_COMMENT_END       = "*/";
1708static ctmbstr DEFAULT_COMMENT_START = "";
1709static ctmbstr DEFAULT_COMMENT_END   = "";
1710
1711static Bool InsideHead( TidyDocImpl* doc, Node *node )
1712{
1713  if ( nodeIsHEAD(node) )
1714    return yes;
1715
1716  if ( node->parent != NULL )
1717    return InsideHead( doc, node->parent );
1718
1719  return no;
1720}
1721
1722/* Is text node and already ends w/ a newline?
1723
1724   Used to pretty print CDATA/PRE text content.
1725   If it already ends on a newline, it is not
1726   necessary to print another before printing end tag.
1727*/
1728static int TextEndsWithNewline(Lexer *lexer, Node *node, uint mode )
1729{
1730    if ( (mode & (CDATA|COMMENT)) && TY_(nodeIsText)(node) && node->end > node->start )
1731    {
1732        uint ch, ix = node->end - 1;
1733        /* Skip non-newline whitespace. */
1734        while ( ix >= node->start && (ch = (lexer->lexbuf[ix] & 0xff))
1735                && ( ch == ' ' || ch == '\t' || ch == '\r' ) )
1736            --ix;
1737
1738        if ( lexer->lexbuf[ ix ] == '\n' )
1739          return node->end - ix - 1; /* #543262 tidy eats all memory */
1740    }
1741    return -1;
1742}
1743
1744static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode )
1745{
1746    assert( node != NULL );
1747    if ( (mode & (CDATA|COMMENT)) && TY_(nodeIsText)(node) && node->end > node->start && start >= node->start )
1748    {
1749        uint ch, ix = start;
1750        /* Skip whitespace. */
1751        while ( ix < node->end && (ch = (lexer->lexbuf[ix] & 0xff))
1752                && ( ch==' ' || ch=='\t' || ch=='\r' ) )
1753            ++ix;
1754
1755        if ( ix > start )
1756          return ix - start;
1757    }
1758    return -1;
1759}
1760
1761static Bool HasCDATA( Lexer* lexer, Node* node )
1762{
1763    /* Scan forward through the textarray. Since the characters we're
1764    ** looking for are < 0x7f, we don't have to do any UTF-8 decoding.
1765    */
1766    ctmbstr start = lexer->lexbuf + node->start;
1767    int len = node->end - node->start + 1;
1768
1769    if ( node->type != TextNode )
1770        return no;
1771
1772    return ( NULL != TY_(tmbsubstrn)( start, len, CDATA_START ));
1773}
1774
1775
1776static
1777void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node )
1778{
1779    TidyPrintImpl* pprint = &doc->pprint;
1780    Node*   content;
1781    ctmbstr commentStart = DEFAULT_COMMENT_START;
1782    ctmbstr commentEnd = DEFAULT_COMMENT_END;
1783    Bool    hasCData = no;
1784    int     contentIndent = -1;
1785    Bool    xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1786
1787    if ( InsideHead(doc, node) )
1788      TY_(PFlushLine)( doc, indent );
1789
1790    PPrintTag( doc, mode, indent, node );
1791
1792    /* use zero indent here, see http://tidy.sf.net/bug/729972 */
1793    TY_(PFlushLine)(doc, 0);
1794
1795    if ( xhtmlOut && node->content != NULL )
1796    {
1797        AttVal* type = attrGetTYPE(node);
1798
1799        if (AttrValueIs(type, "text/javascript"))
1800        {
1801            commentStart = JS_COMMENT_START;
1802            commentEnd = JS_COMMENT_END;
1803        }
1804        else if (AttrValueIs(type, "text/css"))
1805        {
1806            commentStart = CSS_COMMENT_START;
1807            commentEnd = CSS_COMMENT_END;
1808        }
1809        else if (AttrValueIs(type, "text/vbscript"))
1810        {
1811            commentStart = VB_COMMENT_START;
1812            commentEnd = VB_COMMENT_END;
1813        }
1814
1815        hasCData = HasCDATA(doc->lexer, node->content);
1816
1817        if (!hasCData)
1818        {
1819            uint saveWrap = WrapOff( doc );
1820
1821            AddString( pprint, commentStart );
1822            AddString( pprint, CDATA_START );
1823            AddString( pprint, commentEnd );
1824            PCondFlushLine( doc, indent );
1825
1826            WrapOn( doc, saveWrap );
1827        }
1828    }
1829
1830    for ( content = node->content;
1831          content != NULL;
1832          content = content->next )
1833    {
1834        /*
1835          This is a bit odd, with the current code there can only
1836          be one child and the only caller of this function defines
1837          all these modes already...
1838        */
1839        TY_(PPrintTree)( doc, (mode | PREFORMATTED | NOWRAP | CDATA),
1840                         indent, content );
1841
1842        if ( content == node->last )
1843            contentIndent = TextEndsWithNewline( doc->lexer, content, CDATA );
1844    }
1845
1846    if ( contentIndent < 0 )
1847    {
1848        PCondFlushLine( doc, indent );
1849        contentIndent = 0;
1850    }
1851
1852    if ( xhtmlOut && node->content != NULL )
1853    {
1854        if ( ! hasCData )
1855        {
1856            uint saveWrap = WrapOff( doc );
1857
1858            AddString( pprint, commentStart );
1859            AddString( pprint, CDATA_END );
1860            AddString( pprint, commentEnd );
1861
1862            WrapOn( doc, saveWrap );
1863            PCondFlushLine( doc, indent );
1864        }
1865    }
1866
1867    if ( node->content && pprint->indent[ 0 ].spaces != (int)indent )
1868    {
1869        pprint->indent[ 0 ].spaces = indent;
1870    }
1871    PPrintEndTag( doc, mode, indent, node );
1872    if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState
1873         && node->next != NULL &&
1874         !( TY_(nodeHasCM)(node, CM_INLINE) || TY_(nodeIsText)(node) ) )
1875        TY_(PFlushLine)( doc, indent );
1876}
1877
1878
1879
1880static Bool ShouldIndent( TidyDocImpl* doc, Node *node )
1881{
1882    TidyTriState indentContent = cfgAutoBool( doc, TidyIndentContent );
1883    if ( indentContent == TidyNoState )
1884        return no;
1885
1886    if ( nodeIsTEXTAREA(node) )
1887        return no;
1888
1889    if ( indentContent == TidyAutoState )
1890    {
1891        if ( node->content && TY_(nodeHasCM)(node, CM_NO_INDENT) )
1892        {
1893            for ( node = node->content; node; node = node->next )
1894                if ( TY_(nodeHasCM)(node, CM_BLOCK) )
1895                    return yes;
1896            return no;
1897        }
1898
1899        if ( TY_(nodeHasCM)(node, CM_HEADING) )
1900            return no;
1901
1902        if ( nodeIsHTML(node) )
1903            return no;
1904
1905        if ( nodeIsP(node) )
1906            return no;
1907
1908        if ( nodeIsTITLE(node) )
1909            return no;
1910    }
1911
1912    if ( TY_(nodeHasCM)(node, CM_FIELD | CM_OBJECT) )
1913        return yes;
1914
1915    if ( nodeIsMAP(node) )
1916        return yes;
1917
1918    return ( !TY_(nodeHasCM)( node, CM_INLINE ) && node->content );
1919}
1920
1921/*
1922 Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01
1923 print just the content of the body element.
1924 useful when you want to reuse material from
1925 other documents.
1926
1927 -- Sebastiano Vigna <vigna@dsi.unimi.it>
1928*/
1929void TY_(PrintBody)( TidyDocImpl* doc )
1930{
1931    Node *node = TY_(FindBody)( doc );
1932
1933    if ( node )
1934    {
1935        for ( node = node->content; node != NULL; node = node->next )
1936            TY_(PPrintTree)( doc, NORMAL, 0, node );
1937    }
1938}
1939
1940void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
1941{
1942    Node *content, *last;
1943    uint spaces = cfg( doc, TidyIndentSpaces );
1944    Bool xhtml = cfgBool( doc, TidyXhtmlOut );
1945
1946    if ( node == NULL )
1947        return;
1948
1949    if (node->type == TextNode)
1950    {
1951        PPrintText( doc, mode, indent, node );
1952    }
1953    else if ( node->type == CommentTag )
1954    {
1955        PPrintComment( doc, indent, node );
1956    }
1957    else if ( node->type == RootNode )
1958    {
1959        for ( content = node->content; content; content = content->next )
1960           TY_(PPrintTree)( doc, mode, indent, content );
1961    }
1962    else if ( node->type == DocTypeTag )
1963        PPrintDocType( doc, indent, node );
1964    else if ( node->type == ProcInsTag)
1965        PPrintPI( doc, indent, node );
1966    else if ( node->type == XmlDecl)
1967        PPrintXmlDecl( doc, indent, node );
1968    else if ( node->type == CDATATag)
1969        PPrintCDATA( doc, indent, node );
1970    else if ( node->type == SectionTag)
1971        PPrintSection( doc, indent, node );
1972    else if ( node->type == AspTag)
1973        PPrintAsp( doc, indent, node );
1974    else if ( node->type == JsteTag)
1975        PPrintJste( doc, indent, node );
1976    else if ( node->type == PhpTag)
1977        PPrintPhp( doc, indent, node );
1978    else if ( TY_(nodeCMIsEmpty)(node) ||
1979              (node->type == StartEndTag && !xhtml) )
1980    {
1981        if ( ! TY_(nodeHasCM)(node, CM_INLINE) )
1982            PCondFlushLine( doc, indent );
1983
1984        if ( nodeIsBR(node) && node->prev &&
1985             !(nodeIsBR(node->prev) || (mode & PREFORMATTED)) &&
1986             cfgBool(doc, TidyBreakBeforeBR) )
1987            TY_(PFlushLine)( doc, indent );
1988
1989        if ( nodeIsHR(node) )
1990        {
1991            /* insert extra newline for classic formatting */
1992            Bool classic = cfgBool( doc, TidyVertSpace );
1993            if (classic && node->parent && node->parent->content != node)
1994            {
1995                TY_(PFlushLine)( doc, indent );
1996            }
1997        }
1998
1999        PPrintTag( doc, mode, indent, node );
2000
2001        if (node->next)
2002        {
2003          if (nodeIsPARAM(node) || nodeIsAREA(node))
2004              PCondFlushLine(doc, indent);
2005          else if ((nodeIsBR(node) && !(mode & PREFORMATTED))
2006                   || nodeIsHR(node))
2007              TY_(PFlushLine)(doc, indent);
2008        }
2009    }
2010    else /* some kind of container element */
2011    {
2012        if ( node->type == StartEndTag )
2013            node->type = StartTag;
2014
2015        if ( node->tag &&
2016             (node->tag->parser == TY_(ParsePre) || nodeIsTEXTAREA(node)) )
2017        {
2018            Bool classic  = cfgBool( doc, TidyVertSpace );
2019            uint indprev = indent;
2020            PCondFlushLine( doc, indent );
2021
2022            PCondFlushLine( doc, indent );
2023
2024            /* insert extra newline for classic formatting */
2025            if (classic && node->parent && node->parent->content != node)
2026            {
2027                TY_(PFlushLine)( doc, indent );
2028            }
2029            PPrintTag( doc, mode, indent, node );
2030
2031            indent = 0;
2032            TY_(PFlushLine)( doc, indent );
2033
2034            for ( content = node->content; content; content = content->next )
2035            {
2036                TY_(PPrintTree)( doc, (mode | PREFORMATTED | NOWRAP),
2037                                 indent, content );
2038            }
2039            PCondFlushLine( doc, indent );
2040            indent = indprev;
2041            PPrintEndTag( doc, mode, indent, node );
2042
2043            if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState
2044                 && node->next != NULL )
2045                TY_(PFlushLine)( doc, indent );
2046        }
2047        else if ( nodeIsSTYLE(node) || nodeIsSCRIPT(node) )
2048        {
2049            PPrintScriptStyle( doc, (mode | PREFORMATTED | NOWRAP | CDATA),
2050                               indent, node );
2051        }
2052        else if ( TY_(nodeCMIsInline)(node) )
2053        {
2054            if ( cfgBool(doc, TidyMakeClean) )
2055            {
2056                /* replace <nobr>...</nobr> by &nbsp; or &#160; etc. */
2057                if ( nodeIsNOBR(node) )
2058                {
2059                    for ( content = node->content;
2060                          content != NULL;
2061                          content = content->next)
2062                        TY_(PPrintTree)( doc, mode|NOWRAP, indent, content );
2063                    return;
2064                }
2065            }
2066
2067            /* otherwise a normal inline element */
2068            PPrintTag( doc, mode, indent, node );
2069
2070            /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */
2071            if ( ShouldIndent(doc, node) )
2072            {
2073                indent += spaces;
2074                PCondFlushLine( doc, indent );
2075
2076                for ( content = node->content;
2077                      content != NULL;
2078                      content = content->next )
2079                    TY_(PPrintTree)( doc, mode, indent, content );
2080
2081                indent -= spaces;
2082                PCondFlushLine( doc, indent );
2083                /* PCondFlushLine( doc, indent ); */
2084            }
2085            else
2086            {
2087                for ( content = node->content;
2088                      content != NULL;
2089                      content = content->next )
2090                    TY_(PPrintTree)( doc, mode, indent, content );
2091            }
2092            PPrintEndTag( doc, mode, indent, node );
2093        }
2094        else /* other tags */
2095        {
2096            Bool indcont  = ( cfgAutoBool(doc, TidyIndentContent) != TidyNoState );
2097            Bool indsmart = ( cfgAutoBool(doc, TidyIndentContent) == TidyAutoState );
2098            Bool hideend  = cfgBool( doc, TidyHideEndTags );
2099            Bool classic  = cfgBool( doc, TidyVertSpace );
2100            uint contentIndent = indent;
2101
2102            /* insert extra newline for classic formatting */
2103            if (classic && node->parent && node->parent->content != node && !nodeIsHTML(node))
2104            {
2105                TY_(PFlushLine)( doc, indent );
2106            }
2107
2108            if ( ShouldIndent(doc, node) )
2109                contentIndent += spaces;
2110
2111            PCondFlushLine( doc, indent );
2112            if ( indsmart && node->prev != NULL )
2113                TY_(PFlushLine)( doc, indent );
2114
2115            /* do not omit elements with attributes */
2116            if ( !hideend || !TY_(nodeHasCM)(node, CM_OMITST) ||
2117                 node->attributes != NULL )
2118            {
2119                PPrintTag( doc, mode, indent, node );
2120
2121                if ( ShouldIndent(doc, node) )
2122                {
2123                    /* fix for bug 530791, don't wrap after */
2124                    /* <li> if first child is text node     */
2125                    if (!(nodeIsLI(node) && TY_(nodeIsText)(node->content)))
2126                        PCondFlushLine( doc, contentIndent );
2127                }
2128                else if ( TY_(nodeHasCM)(node, CM_HTML) || nodeIsNOFRAMES(node) ||
2129                          (TY_(nodeHasCM)(node, CM_HEAD) && !nodeIsTITLE(node)) )
2130                    TY_(PFlushLine)( doc, contentIndent );
2131            }
2132
2133            last = NULL;
2134            for ( content = node->content; content; content = content->next )
2135            {
2136                /* kludge for naked text before block level tag */
2137                if ( last && !indcont && TY_(nodeIsText)(last) &&
2138                     content->tag && !TY_(nodeHasCM)(content, CM_INLINE) )
2139                {
2140                    /* TY_(PFlushLine)(fout, indent); */
2141                    TY_(PFlushLine)( doc, contentIndent );
2142                }
2143
2144                TY_(PPrintTree)( doc, mode, contentIndent, content );
2145                last = content;
2146            }
2147
2148            /* don't flush line for td and th */
2149            if ( ShouldIndent(doc, node) ||
2150                 ( !hideend &&
2151                   ( TY_(nodeHasCM)(node, CM_HTML) ||
2152                     nodeIsNOFRAMES(node) ||
2153                     (TY_(nodeHasCM)(node, CM_HEAD) && !nodeIsTITLE(node))
2154                   )
2155                 )
2156               )
2157            {
2158                PCondFlushLine( doc, indent );
2159                if ( !hideend || !TY_(nodeHasCM)(node, CM_OPT) )
2160                {
2161                    PPrintEndTag( doc, mode, indent, node );
2162                    /* TY_(PFlushLine)( doc, indent ); */
2163                }
2164            }
2165            else
2166            {
2167                if ( !hideend || !TY_(nodeHasCM)(node, CM_OPT) )
2168                {
2169                    /* newline before endtag for classic formatting */
2170                    if ( classic && !HasMixedContent(node) )
2171                        TY_(PFlushLine)( doc, indent );
2172                    PPrintEndTag( doc, mode, indent, node );
2173                }
2174            }
2175
2176            if (!indcont && !hideend && !nodeIsHTML(node) && !classic)
2177                TY_(PFlushLine)( doc, indent );
2178            else if (classic && node->next != NULL && TY_(nodeHasCM)(node, CM_LIST|CM_DEFLIST|CM_TABLE|CM_BLOCK/*|CM_HEADING*/))
2179                TY_(PFlushLine)( doc, indent );
2180        }
2181    }
2182}
2183
2184void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node )
2185{
2186    Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
2187    if (node == NULL)
2188        return;
2189
2190    if ( node->type == TextNode)
2191    {
2192        PPrintText( doc, mode, indent, node );
2193    }
2194    else if ( node->type == CommentTag )
2195    {
2196        PCondFlushLine( doc, indent );
2197        PPrintComment( doc, indent, node);
2198        /* PCondFlushLine( doc, 0 ); */
2199    }
2200    else if ( node->type == RootNode )
2201    {
2202        Node *content;
2203        for ( content = node->content;
2204              content != NULL;
2205              content = content->next )
2206           TY_(PPrintXMLTree)( doc, mode, indent, content );
2207    }
2208    else if ( node->type == DocTypeTag )
2209        PPrintDocType( doc, indent, node );
2210    else if ( node->type == ProcInsTag )
2211        PPrintPI( doc, indent, node );
2212    else if ( node->type == XmlDecl )
2213        PPrintXmlDecl( doc, indent, node );
2214    else if ( node->type == CDATATag )
2215        PPrintCDATA( doc, indent, node );
2216    else if ( node->type == SectionTag )
2217        PPrintSection( doc, indent, node );
2218    else if ( node->type == AspTag )
2219        PPrintAsp( doc, indent, node );
2220    else if ( node->type == JsteTag)
2221        PPrintJste( doc, indent, node );
2222    else if ( node->type == PhpTag)
2223        PPrintPhp( doc, indent, node );
2224    else if ( TY_(nodeHasCM)(node, CM_EMPTY) ||
2225              (node->type == StartEndTag && !xhtmlOut) )
2226    {
2227        PCondFlushLine( doc, indent );
2228        PPrintTag( doc, mode, indent, node );
2229        /* TY_(PFlushLine)( doc, indent ); */
2230    }
2231    else /* some kind of container element */
2232    {
2233        uint spaces = cfg( doc, TidyIndentSpaces );
2234        Node *content;
2235        Bool mixed = no;
2236        uint cindent;
2237
2238        for ( content = node->content; content; content = content->next )
2239        {
2240            if ( TY_(nodeIsText)(content) )
2241            {
2242                mixed = yes;
2243                break;
2244            }
2245        }
2246
2247        PCondFlushLine( doc, indent );
2248
2249        if ( TY_(XMLPreserveWhiteSpace)(doc, node) )
2250        {
2251            indent = 0;
2252            mixed = no;
2253            cindent = 0;
2254        }
2255        else if (mixed)
2256            cindent = indent;
2257        else
2258            cindent = indent + spaces;
2259
2260        PPrintTag( doc, mode, indent, node );
2261        if ( !mixed && node->content )
2262            TY_(PFlushLine)( doc, cindent );
2263
2264        for ( content = node->content; content; content = content->next )
2265            TY_(PPrintXMLTree)( doc, mode, cindent, content );
2266
2267        if ( !mixed && node->content )
2268            PCondFlushLine( doc, indent );
2269
2270        PPrintEndTag( doc, mode, indent, node );
2271        /* PCondFlushLine( doc, indent ); */
2272    }
2273}
2274
2275/*
2276 * local variables:
2277 * mode: c
2278 * indent-tabs-mode: nil
2279 * c-basic-offset: 4
2280 * eval: (c-set-offset 'substatement-open 0)
2281 * end:
2282 */
2283