1/* 2 pprint.c -- pretty print parse tree 3 4 (c) 1998-2006 (W3C) MIT, ERCIM, Keio University 5 See tidy.h for the copyright notice. 6 7 CVS Info : 8 9 $Author: iccir $ 10 $Date: 2007/02/08 06:24:33 $ 11 $Revision: 1.4 $ 12 13*/ 14 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18 19#include "pprint.h" 20#include "tidy-int.h" 21#include "parser.h" 22#include "entities.h" 23#include "tmbstr.h" 24#include "utf8.h" 25 26/* 27 Block-level and unknown elements are printed on 28 new lines and their contents indented 2 spaces 29 30 Inline elements are printed inline. 31 32 Inline content is wrapped on spaces (except in 33 attribute values or preformatted text, after 34 start tags and before end tags 35*/ 36 37static void PPrintAsp( TidyDocImpl* doc, uint indent, Node* node ); 38static void PPrintJste( TidyDocImpl* doc, uint indent, Node* node ); 39static void PPrintPhp( TidyDocImpl* doc, uint indent, Node* node ); 40static int TextEndsWithNewline( Lexer *lexer, Node *node, uint mode ); 41static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode ); 42static Bool InsideHead( TidyDocImpl* doc, Node *node ); 43static Bool ShouldIndent( TidyDocImpl* doc, Node *node ); 44 45#if SUPPORT_ASIAN_ENCODINGS 46/* #431953 - start RJ Wraplen adjusted for smooth international ride */ 47 48#if 0 49uint CWrapLen( TidyDocImpl* doc, uint ind ) 50{ 51 ctmbstr lang = cfgStr( doc, TidyLanguage ); 52 uint wraplen = cfg( doc, TidyWrapLen ); 53 54 if ( !TY_(tmbstrcasecmp)(lang, "zh") ) 55 /* Chinese characters take two positions on a fixed-width screen */ 56 /* It would be more accurate to keep a parallel linelen and wraphere 57 incremented by 2 for Chinese characters and 1 otherwise, but this 58 is way simpler. 59 */ 60 return (ind + (( wraplen - ind ) / 2)) ; 61 62 if ( !TY_(tmbstrcasecmp)(lang, "ja") ) 63 /* average Japanese text is 30% kanji */ 64 return (ind + ((( wraplen - ind ) * 7) / 10)) ; 65 66 return wraplen; 67} 68#endif 69 70typedef enum 71{ 72 UC00, /* None */ 73 UCPC, /* Punctuation, Connector */ 74 UCPD, /* Punctuation, Dash */ 75 UCPE, /* Punctuation, Close */ 76 UCPS, /* Punctuation, Open */ 77 UCPI, /* Punctuation, Initial quote */ 78 UCPF, /* Punctuation, Final quote */ 79 UCPO, /* Punctuation, Other */ 80 UCZS, /* Separator, Space */ 81 UCZL, /* Separator, Line */ 82 UCZP /* Separator, Paragraph */ 83} UnicodeCategory; 84 85/* 86 From the original code, the following characters are removed: 87 88 U+2011 (non-breaking hyphen) 89 U+202F (narrow non-break space) 90 U+2044 (fraction slash) 91 U+200B (zero width space) 92 ...... (bidi formatting control characters) 93 94 U+2011 and U+202F are non-breaking, U+2044 is a Sm character, 95 U+200B is a non-visible space, wrapping after it would make 96 this space visible, bidi should be done using HTML features 97 and the characters are neither Px or Zx. 98 99 The following Unicode 3.0 punctuation characters are added: 100 101 U+2048 (question exclamation mark) 102 U+2049 (exclamation question mark) 103 U+204A (tironian sign et) 104 U+204B (reversed pilcrow sign) 105 U+204C (black leftwards bullet) 106 U+204D (black rightwards bullet) 107 U+3030 (wavy dash) 108 U+30FB (katakana middle dot) 109 U+FE63 (small hyphen-minus) 110 U+FE68 (small reverse solidus) 111 U+FF3F (fullwidth low line) 112 U+FF5B (fullwidth left curly bracket) 113 U+FF5D (fullwidth right curly bracket) 114 115 Other additional characters were not included in Unicode 3.0. 116 The table is based on Unicode 4.0. It must include only those 117 characters marking a wrapping point, "before" if the general 118 category is UCPS or UCPI, otherwise "after". 119*/ 120static struct _unicode4cat 121{ 122 unsigned long code; 123 UnicodeCategory category; 124} const unicode4cat[] = 125{ 126#if 0 127 { 0x037E, UCPO }, { 0x0387, UCPO }, { 0x055A, UCPO }, { 0x055B, UCPO }, 128 { 0x055C, UCPO }, { 0x055D, UCPO }, { 0x055E, UCPO }, { 0x055F, UCPO }, 129 { 0x0589, UCPO }, { 0x058A, UCPD }, { 0x05BE, UCPO }, { 0x05C0, UCPO }, 130 { 0x05C3, UCPO }, { 0x05F3, UCPO }, { 0x05F4, UCPO }, { 0x060C, UCPO }, 131 { 0x060D, UCPO }, { 0x061B, UCPO }, { 0x061F, UCPO }, { 0x066A, UCPO }, 132 { 0x066B, UCPO }, { 0x066C, UCPO }, { 0x066D, UCPO }, { 0x06D4, UCPO }, 133 { 0x0700, UCPO }, { 0x0701, UCPO }, { 0x0702, UCPO }, { 0x0703, UCPO }, 134 { 0x0704, UCPO }, { 0x0705, UCPO }, { 0x0706, UCPO }, { 0x0707, UCPO }, 135 { 0x0708, UCPO }, { 0x0709, UCPO }, { 0x070A, UCPO }, { 0x070B, UCPO }, 136 { 0x070C, UCPO }, { 0x070D, UCPO }, { 0x0964, UCPO }, { 0x0965, UCPO }, 137 { 0x0970, UCPO }, { 0x0DF4, UCPO }, { 0x0E4F, UCPO }, { 0x0E5A, UCPO }, 138 { 0x0E5B, UCPO }, { 0x0F04, UCPO }, { 0x0F05, UCPO }, { 0x0F06, UCPO }, 139 { 0x0F07, UCPO }, { 0x0F08, UCPO }, { 0x0F09, UCPO }, { 0x0F0A, UCPO }, 140 { 0x0F0B, UCPO }, { 0x0F0D, UCPO }, { 0x0F0E, UCPO }, { 0x0F0F, UCPO }, 141 { 0x0F10, UCPO }, { 0x0F11, UCPO }, { 0x0F12, UCPO }, { 0x0F3A, UCPS }, 142 { 0x0F3B, UCPE }, { 0x0F3C, UCPS }, { 0x0F3D, UCPE }, { 0x0F85, UCPO }, 143 { 0x104A, UCPO }, { 0x104B, UCPO }, { 0x104C, UCPO }, { 0x104D, UCPO }, 144 { 0x104E, UCPO }, { 0x104F, UCPO }, { 0x10FB, UCPO }, { 0x1361, UCPO }, 145 { 0x1362, UCPO }, { 0x1363, UCPO }, { 0x1364, UCPO }, { 0x1365, UCPO }, 146 { 0x1366, UCPO }, { 0x1367, UCPO }, { 0x1368, UCPO }, { 0x166D, UCPO }, 147 { 0x166E, UCPO }, { 0x1680, UCZS }, { 0x169B, UCPS }, { 0x169C, UCPE }, 148 { 0x16EB, UCPO }, { 0x16EC, UCPO }, { 0x16ED, UCPO }, { 0x1735, UCPO }, 149 { 0x1736, UCPO }, { 0x17D4, UCPO }, { 0x17D5, UCPO }, { 0x17D6, UCPO }, 150 { 0x17D8, UCPO }, { 0x17D9, UCPO }, { 0x17DA, UCPO }, { 0x1800, UCPO }, 151 { 0x1801, UCPO }, { 0x1802, UCPO }, { 0x1803, UCPO }, { 0x1804, UCPO }, 152 { 0x1805, UCPO }, { 0x1806, UCPD }, { 0x1807, UCPO }, { 0x1808, UCPO }, 153 { 0x1809, UCPO }, { 0x180A, UCPO }, { 0x180E, UCZS }, { 0x1944, UCPO }, 154 { 0x1945, UCPO }, 155#endif 156 { 0x2000, UCZS }, { 0x2001, UCZS }, { 0x2002, UCZS }, { 0x2003, UCZS }, 157 { 0x2004, UCZS }, { 0x2005, UCZS }, { 0x2006, UCZS }, { 0x2008, UCZS }, 158 { 0x2009, UCZS }, { 0x200A, UCZS }, { 0x2010, UCPD }, { 0x2012, UCPD }, 159 { 0x2013, UCPD }, { 0x2014, UCPD }, { 0x2015, UCPD }, { 0x2016, UCPO }, 160 { 0x2017, UCPO }, { 0x2018, UCPI }, { 0x2019, UCPF }, { 0x201A, UCPS }, 161 { 0x201B, UCPI }, { 0x201C, UCPI }, { 0x201D, UCPF }, { 0x201E, UCPS }, 162 { 0x201F, UCPI }, { 0x2020, UCPO }, { 0x2021, UCPO }, { 0x2022, UCPO }, 163 { 0x2023, UCPO }, { 0x2024, UCPO }, { 0x2025, UCPO }, { 0x2026, UCPO }, 164 { 0x2027, UCPO }, { 0x2028, UCZL }, { 0x2029, UCZP }, { 0x2030, UCPO }, 165 { 0x2031, UCPO }, { 0x2032, UCPO }, { 0x2033, UCPO }, { 0x2034, UCPO }, 166 { 0x2035, UCPO }, { 0x2036, UCPO }, { 0x2037, UCPO }, { 0x2038, UCPO }, 167 { 0x2039, UCPI }, { 0x203A, UCPF }, { 0x203B, UCPO }, { 0x203C, UCPO }, 168 { 0x203D, UCPO }, { 0x203E, UCPO }, { 0x203F, UCPC }, { 0x2040, UCPC }, 169 { 0x2041, UCPO }, { 0x2042, UCPO }, { 0x2043, UCPO }, { 0x2045, UCPS }, 170 { 0x2046, UCPE }, { 0x2047, UCPO }, { 0x2048, UCPO }, { 0x2049, UCPO }, 171 { 0x204A, UCPO }, { 0x204B, UCPO }, { 0x204C, UCPO }, { 0x204D, UCPO }, 172 { 0x204E, UCPO }, { 0x204F, UCPO }, { 0x2050, UCPO }, { 0x2051, UCPO }, 173 { 0x2053, UCPO }, { 0x2054, UCPC }, { 0x2057, UCPO }, { 0x205F, UCZS }, 174 { 0x207D, UCPS }, { 0x207E, UCPE }, { 0x208D, UCPS }, { 0x208E, UCPE }, 175 { 0x2329, UCPS }, { 0x232A, UCPE }, { 0x23B4, UCPS }, { 0x23B5, UCPE }, 176 { 0x23B6, UCPO }, { 0x2768, UCPS }, { 0x2769, UCPE }, { 0x276A, UCPS }, 177 { 0x276B, UCPE }, { 0x276C, UCPS }, { 0x276D, UCPE }, { 0x276E, UCPS }, 178 { 0x276F, UCPE }, { 0x2770, UCPS }, { 0x2771, UCPE }, { 0x2772, UCPS }, 179 { 0x2773, UCPE }, { 0x2774, UCPS }, { 0x2775, UCPE }, { 0x27E6, UCPS }, 180 { 0x27E7, UCPE }, { 0x27E8, UCPS }, { 0x27E9, UCPE }, { 0x27EA, UCPS }, 181 { 0x27EB, UCPE }, { 0x2983, UCPS }, { 0x2984, UCPE }, { 0x2985, UCPS }, 182 { 0x2986, UCPE }, { 0x2987, UCPS }, { 0x2988, UCPE }, { 0x2989, UCPS }, 183 { 0x298A, UCPE }, { 0x298B, UCPS }, { 0x298C, UCPE }, { 0x298D, UCPS }, 184 { 0x298E, UCPE }, { 0x298F, UCPS }, { 0x2990, UCPE }, { 0x2991, UCPS }, 185 { 0x2992, UCPE }, { 0x2993, UCPS }, { 0x2994, UCPE }, { 0x2995, UCPS }, 186 { 0x2996, UCPE }, { 0x2997, UCPS }, { 0x2998, UCPE }, { 0x29D8, UCPS }, 187 { 0x29D9, UCPE }, { 0x29DA, UCPS }, { 0x29DB, UCPE }, { 0x29FC, UCPS }, 188 { 0x29FD, UCPE }, { 0x3001, UCPO }, { 0x3002, UCPO }, { 0x3003, UCPO }, 189 { 0x3008, UCPS }, { 0x3009, UCPE }, { 0x300A, UCPS }, { 0x300B, UCPE }, 190 { 0x300C, UCPS }, { 0x300D, UCPE }, { 0x300E, UCPS }, { 0x300F, UCPE }, 191 { 0x3010, UCPS }, { 0x3011, UCPE }, { 0x3014, UCPS }, { 0x3015, UCPE }, 192 { 0x3016, UCPS }, { 0x3017, UCPE }, { 0x3018, UCPS }, { 0x3019, UCPE }, 193 { 0x301A, UCPS }, { 0x301B, UCPE }, { 0x301C, UCPD }, { 0x301D, UCPS }, 194 { 0x301E, UCPE }, { 0x301F, UCPE }, { 0x3030, UCPD }, { 0x303D, UCPO }, 195 { 0x30A0, UCPD }, { 0x30FB, UCPC }, { 0xFD3E, UCPS }, { 0xFD3F, UCPE }, 196 { 0xFE30, UCPO }, { 0xFE31, UCPD }, { 0xFE32, UCPD }, { 0xFE33, UCPC }, 197 { 0xFE34, UCPC }, { 0xFE35, UCPS }, { 0xFE36, UCPE }, { 0xFE37, UCPS }, 198 { 0xFE38, UCPE }, { 0xFE39, UCPS }, { 0xFE3A, UCPE }, { 0xFE3B, UCPS }, 199 { 0xFE3C, UCPE }, { 0xFE3D, UCPS }, { 0xFE3E, UCPE }, { 0xFE3F, UCPS }, 200 { 0xFE40, UCPE }, { 0xFE41, UCPS }, { 0xFE42, UCPE }, { 0xFE43, UCPS }, 201 { 0xFE44, UCPE }, { 0xFE45, UCPO }, { 0xFE46, UCPO }, { 0xFE47, UCPS }, 202 { 0xFE48, UCPE }, { 0xFE49, UCPO }, { 0xFE4A, UCPO }, { 0xFE4B, UCPO }, 203 { 0xFE4C, UCPO }, { 0xFE4D, UCPC }, { 0xFE4E, UCPC }, { 0xFE4F, UCPC }, 204 { 0xFE50, UCPO }, { 0xFE51, UCPO }, { 0xFE52, UCPO }, { 0xFE54, UCPO }, 205 { 0xFE55, UCPO }, { 0xFE56, UCPO }, { 0xFE57, UCPO }, { 0xFE58, UCPD }, 206 { 0xFE59, UCPS }, { 0xFE5A, UCPE }, { 0xFE5B, UCPS }, { 0xFE5C, UCPE }, 207 { 0xFE5D, UCPS }, { 0xFE5E, UCPE }, { 0xFE5F, UCPO }, { 0xFE60, UCPO }, 208 { 0xFE61, UCPO }, { 0xFE63, UCPD }, { 0xFE68, UCPO }, { 0xFE6A, UCPO }, 209 { 0xFE6B, UCPO }, { 0xFF01, UCPO }, { 0xFF02, UCPO }, { 0xFF03, UCPO }, 210 { 0xFF05, UCPO }, { 0xFF06, UCPO }, { 0xFF07, UCPO }, { 0xFF08, UCPS }, 211 { 0xFF09, UCPE }, { 0xFF0A, UCPO }, { 0xFF0C, UCPO }, { 0xFF0D, UCPD }, 212 { 0xFF0E, UCPO }, { 0xFF0F, UCPO }, { 0xFF1A, UCPO }, { 0xFF1B, UCPO }, 213 { 0xFF1F, UCPO }, { 0xFF20, UCPO }, { 0xFF3B, UCPS }, { 0xFF3C, UCPO }, 214 { 0xFF3D, UCPE }, { 0xFF3F, UCPC }, { 0xFF5B, UCPS }, { 0xFF5D, UCPE }, 215 { 0xFF5F, UCPS }, { 0xFF60, UCPE }, { 0xFF61, UCPO }, { 0xFF62, UCPS }, 216 { 0xFF63, UCPE }, { 0xFF64, UCPO }, { 0xFF65, UCPC }, { 0x10100,UCPO }, 217 { 0x10101,UCPO }, { 0x1039F,UCPO }, 218 219 /* final entry */ 220 { 0x0000, UC00 } 221}; 222 223typedef enum 224{ 225 NoWrapPoint, 226 WrapBefore, 227 WrapAfter 228} WrapPoint; 229 230/* 231 If long lines of text have no white space as defined in HTML 4 232 (U+0009, U+000A, U+000D, U+000C, U+0020) other characters could 233 be used to determine a wrap point. Since user agents would 234 normalize the inserted newline character to a space character, 235 this wrapping behaviour would insert visual whitespace into the 236 document. 237 238 Characters of the General Category Pi and Ps in the Unicode 239 character database (opening punctuation and intial quote 240 characters) mark a wrapping point before the character, other 241 punctuation characters (Pc, Pd, Pe, Pf, and Po), breakable 242 space characters (Zs), and paragraph and line separators 243 (Zl, Zp) mark a wrap point after the character. Using this 244 function Tidy can for example pretty print 245 246 <p>....................“...quote...”...</p> 247 as 248 <p>....................\n“...quote...”...</p> 249 or 250 <p>....................“...quote...”\n...</p> 251 252 if the next normal wrapping point would exceed the user 253 chosen wrapping column. 254*/ 255static WrapPoint CharacterWrapPoint(tchar c) 256{ 257 int i; 258 for (i = 0; unicode4cat[i].code && unicode4cat[i].code <= c; ++i) 259 if (unicode4cat[i].code == c) 260 /* wrapping before opening punctuation and initial quotes */ 261 if (unicode4cat[i].category == UCPS || 262 unicode4cat[i].category == UCPI) 263 return WrapBefore; 264 /* else wrapping after this character */ 265 else 266 return WrapAfter; 267 /* character has no effect on line wrapping */ 268 return NoWrapPoint; 269} 270 271static WrapPoint Big5WrapPoint(tchar c) 272{ 273 if ((c & 0xFF00) == 0xA100) 274 { 275 /* opening brackets have odd codes: break before them */ 276 if ( c > 0xA15C && c < 0xA1AD && (c & 1) == 1 ) 277 return WrapBefore; 278 return WrapAfter; 279 } 280 return NoWrapPoint; 281} 282 283#endif /* SUPPORT_ASIAN_ENCODINGS */ 284 285static void InitIndent( TidyIndent* ind ) 286{ 287 ind->spaces = -1; 288 ind->attrValStart = -1; 289 ind->attrStringStart = -1; 290} 291 292void TY_(InitPrintBuf)( TidyDocImpl* doc ) 293{ 294 ClearMemory( &doc->pprint, sizeof(TidyPrintImpl) ); 295 InitIndent( &doc->pprint.indent[0] ); 296 InitIndent( &doc->pprint.indent[1] ); 297} 298 299void TY_(FreePrintBuf)( TidyDocImpl* doc ) 300{ 301 MemFree( doc->pprint.linebuf ); 302 TY_(InitPrintBuf)( doc ); 303} 304 305static void expand( TidyPrintImpl* pprint, uint len ) 306{ 307 uint* ip; 308 uint buflen = pprint->lbufsize; 309 310 if ( buflen == 0 ) 311 buflen = 256; 312 while ( len >= buflen ) 313 buflen *= 2; 314 315 ip = (uint*) MemRealloc( pprint->linebuf, buflen*sizeof(uint) ); 316 if ( ip ) 317 { 318 ClearMemory( ip+pprint->lbufsize, 319 (buflen-pprint->lbufsize)*sizeof(uint) ); 320 pprint->lbufsize = buflen; 321 pprint->linebuf = ip; 322 } 323} 324 325static uint GetSpaces( TidyPrintImpl* pprint ) 326{ 327 int spaces = pprint->indent[ 0 ].spaces; 328 return ( spaces < 0 ? 0U : (uint) spaces ); 329} 330static int ClearInString( TidyPrintImpl* pprint ) 331{ 332 TidyIndent *ind = pprint->indent + pprint->ixInd; 333 return ind->attrStringStart = -1; 334} 335static int ToggleInString( TidyPrintImpl* pprint ) 336{ 337 TidyIndent *ind = pprint->indent + pprint->ixInd; 338 Bool inString = ( ind->attrStringStart >= 0 ); 339 return ind->attrStringStart = ( inString ? -1 : (int) pprint->linelen ); 340} 341static Bool IsInString( TidyPrintImpl* pprint ) 342{ 343 TidyIndent *ind = pprint->indent + 0; /* Always 1st */ 344 return ( ind->attrStringStart >= 0 && 345 ind->attrStringStart < (int) pprint->linelen ); 346} 347static Bool IsWrapInString( TidyPrintImpl* pprint ) 348{ 349 TidyIndent *ind = pprint->indent + 0; /* Always 1st */ 350 int wrap = (int) pprint->wraphere; 351 return ( ind->attrStringStart == 0 || 352 (ind->attrStringStart > 0 && ind->attrStringStart < wrap) ); 353} 354 355static Bool HasMixedContent (Node *element) 356{ 357 Node * node; 358 359 if (!element) 360 return no; 361 362 for (node = element->content; node; node = node->next) 363 if ( TY_(nodeIsText)(node) ) 364 return yes; 365 366 return no; 367} 368 369static void ClearInAttrVal( TidyPrintImpl* pprint ) 370{ 371 TidyIndent *ind = pprint->indent + pprint->ixInd; 372 ind->attrValStart = -1; 373} 374static int SetInAttrVal( TidyPrintImpl* pprint ) 375{ 376 TidyIndent *ind = pprint->indent + pprint->ixInd; 377 return ind->attrValStart = (int) pprint->linelen; 378} 379static Bool IsWrapInAttrVal( TidyPrintImpl* pprint ) 380{ 381 TidyIndent *ind = pprint->indent + 0; /* Always 1st */ 382 int wrap = (int) pprint->wraphere; 383 return ( ind->attrValStart == 0 || 384 (ind->attrValStart > 0 && ind->attrValStart < wrap) ); 385} 386 387static Bool WantIndent( TidyDocImpl* doc ) 388{ 389 TidyPrintImpl* pprint = &doc->pprint; 390 Bool wantIt = GetSpaces(pprint) > 0; 391 if ( wantIt ) 392 { 393 Bool indentAttrs = cfgBool( doc, TidyIndentAttributes ); 394 wantIt = ( ( !IsWrapInAttrVal(pprint) || indentAttrs ) && 395 !IsWrapInString(pprint) ); 396 } 397 return wantIt; 398} 399 400 401static uint WrapOff( TidyDocImpl* doc ) 402{ 403 uint saveWrap = cfg( doc, TidyWrapLen ); 404 TY_(SetOptionInt)( doc, TidyWrapLen, 0xFFFFFFFF ); /* very large number */ 405 return saveWrap; 406} 407 408static void WrapOn( TidyDocImpl* doc, uint saveWrap ) 409{ 410 TY_(SetOptionInt)( doc, TidyWrapLen, saveWrap ); 411} 412 413static uint WrapOffCond( TidyDocImpl* doc, Bool onoff ) 414{ 415 if ( onoff ) 416 return WrapOff( doc ); 417 return cfg( doc, TidyWrapLen ); 418} 419 420 421static void AddC( TidyPrintImpl* pprint, uint c, uint string_index) 422{ 423 if ( string_index + 1 >= pprint->lbufsize ) 424 expand( pprint, string_index + 1 ); 425 pprint->linebuf[string_index] = c; 426} 427 428static uint AddChar( TidyPrintImpl* pprint, uint c ) 429{ 430 AddC( pprint, c, pprint->linelen ); 431 return ++pprint->linelen; 432} 433 434static uint AddAsciiString( TidyPrintImpl* pprint, ctmbstr str, uint string_index ) 435{ 436 uint ix, len = TY_(tmbstrlen)( str ); 437 if ( string_index + len >= pprint->lbufsize ) 438 expand( pprint, string_index + len ); 439 440 for ( ix=0; ix<len; ++ix ) 441 pprint->linebuf[string_index + ix] = str[ ix ]; 442 return string_index + len; 443} 444 445static uint AddString( TidyPrintImpl* pprint, ctmbstr str ) 446{ 447 return pprint->linelen = AddAsciiString( pprint, str, pprint->linelen ); 448} 449 450/* Saves current output point as the wrap point, 451** but only if indentation would NOT overflow 452** the current line. Otherwise keep previous wrap point. 453*/ 454static Bool SetWrap( TidyDocImpl* doc, uint indent ) 455{ 456 TidyPrintImpl* pprint = &doc->pprint; 457 Bool wrap = ( indent + pprint->linelen < cfg(doc, TidyWrapLen) ); 458 if ( wrap ) 459 { 460 if ( pprint->indent[0].spaces < 0 ) 461 pprint->indent[0].spaces = indent; 462 pprint->wraphere = pprint->linelen; 463 } 464 else if ( pprint->ixInd == 0 ) 465 { 466 /* Save indent 1st time we pass the the wrap line */ 467 pprint->indent[ 1 ].spaces = indent; 468 pprint->ixInd = 1; 469 } 470 return wrap; 471} 472 473static void CarryOver( int* valTo, int* valFrom, uint wrapPoint ) 474{ 475 if ( *valFrom > (int) wrapPoint ) 476 { 477 *valTo = *valFrom - wrapPoint; 478 *valFrom = -1; 479 } 480} 481 482 483static Bool SetWrapAttr( TidyDocImpl* doc, 484 uint indent, int attrStart, int strStart ) 485{ 486 TidyPrintImpl* pprint = &doc->pprint; 487 TidyIndent *ind = pprint->indent + 0; 488 489 Bool wrap = ( indent + pprint->linelen < cfg(doc, TidyWrapLen) ); 490 if ( wrap ) 491 { 492 if ( ind[0].spaces < 0 ) 493 ind[0].spaces = indent; 494 pprint->wraphere = pprint->linelen; 495 } 496 else if ( pprint->ixInd == 0 ) 497 { 498 /* Save indent 1st time we pass the the wrap line */ 499 pprint->indent[ 1 ].spaces = indent; 500 pprint->ixInd = 1; 501 502 /* Carry over string state */ 503 CarryOver( &ind[1].attrStringStart, &ind[0].attrStringStart, pprint->wraphere ); 504 CarryOver( &ind[1].attrValStart, &ind[0].attrValStart, pprint->wraphere ); 505 } 506 ind += doc->pprint.ixInd; 507 ind->attrValStart = attrStart; 508 ind->attrStringStart = strStart; 509 return wrap; 510} 511 512 513/* Reset indent state after flushing a new line 514*/ 515static void ResetLine( TidyPrintImpl* pprint ) 516{ 517 TidyIndent* ind = pprint->indent + 0; 518 if ( pprint->ixInd > 0 ) 519 { 520 ind[0] = ind[1]; 521 InitIndent( &ind[1] ); 522 } 523 524 if ( pprint->wraphere > 0 ) 525 { 526 int wrap = (int) pprint->wraphere; 527 if ( ind[0].attrStringStart > wrap ) 528 ind[0].attrStringStart -= wrap; 529 if ( ind[0].attrValStart > wrap ) 530 ind[0].attrValStart -= wrap; 531 } 532 else 533 { 534 if ( ind[0].attrStringStart > 0 ) 535 ind[0].attrStringStart = 0; 536 if ( ind[0].attrValStart > 0 ) 537 ind[0].attrValStart = 0; 538 } 539 pprint->wraphere = pprint->ixInd = 0; 540} 541 542/* Shift text after wrap point to 543** beginning of next line. 544*/ 545static void ResetLineAfterWrap( TidyPrintImpl* pprint ) 546{ 547 if ( pprint->linelen > pprint->wraphere ) 548 { 549 uint *p = pprint->linebuf; 550 uint *q = p + pprint->wraphere; 551 uint *end = p + pprint->linelen; 552 553 if ( ! IsWrapInAttrVal(pprint) ) 554 { 555 while ( q < end && *q == ' ' ) 556 ++q, ++pprint->wraphere; 557 } 558 559 while ( q < end ) 560 *p++ = *q++; 561 562 pprint->linelen -= pprint->wraphere; 563 } 564 else 565 { 566 pprint->linelen = 0; 567 } 568 569 ResetLine( pprint ); 570} 571 572/* Goes ahead with writing current line up to 573** previously saved wrap point. Shifts unwritten 574** text in output buffer to beginning of next line. 575*/ 576static void WrapLine( TidyDocImpl* doc ) 577{ 578 TidyPrintImpl* pprint = &doc->pprint; 579 uint i; 580 581 if ( pprint->wraphere == 0 ) 582 return; 583 584 if ( WantIndent(doc) ) 585 { 586 uint spaces = GetSpaces( pprint ); 587 for ( i = 0; i < spaces; ++i ) 588 TY_(WriteChar)( ' ', doc->docOut ); 589 } 590 591 for ( i = 0; i < pprint->wraphere; ++i ) 592 TY_(WriteChar)( pprint->linebuf[i], doc->docOut ); 593 594 if ( IsWrapInString(pprint) ) 595 TY_(WriteChar)( '\\', doc->docOut ); 596 597 TY_(WriteChar)( '\n', doc->docOut ); 598 ResetLineAfterWrap( pprint ); 599} 600 601/* Checks current output line length along with current indent. 602** If combined they overflow output line length, go ahead 603** and flush output up to the current wrap point. 604*/ 605static Bool CheckWrapLine( TidyDocImpl* doc ) 606{ 607 TidyPrintImpl* pprint = &doc->pprint; 608 if ( GetSpaces(pprint) + pprint->linelen >= cfg(doc, TidyWrapLen) ) 609 { 610 WrapLine( doc ); 611 return yes; 612 } 613 return no; 614} 615 616static Bool CheckWrapIndent( TidyDocImpl* doc, uint indent ) 617{ 618 TidyPrintImpl* pprint = &doc->pprint; 619 if ( GetSpaces(pprint) + pprint->linelen >= cfg(doc, TidyWrapLen) ) 620 { 621 WrapLine( doc ); 622 if ( pprint->indent[ 0 ].spaces < 0 ) 623 pprint->indent[ 0 ].spaces = indent; 624 return yes; 625 } 626 return no; 627} 628 629static void WrapAttrVal( TidyDocImpl* doc ) 630{ 631 TidyPrintImpl* pprint = &doc->pprint; 632 uint i; 633 634 /* assert( IsWrapInAttrVal(pprint) ); */ 635 if ( WantIndent(doc) ) 636 { 637 uint spaces = GetSpaces( pprint ); 638 for ( i = 0; i < spaces; ++i ) 639 TY_(WriteChar)( ' ', doc->docOut ); 640 } 641 642 for ( i = 0; i < pprint->wraphere; ++i ) 643 TY_(WriteChar)( pprint->linebuf[i], doc->docOut ); 644 645 if ( IsWrapInString(pprint) ) 646 TY_(WriteChar)( '\\', doc->docOut ); 647 else 648 TY_(WriteChar)( ' ', doc->docOut ); 649 650 TY_(WriteChar)( '\n', doc->docOut ); 651 ResetLineAfterWrap( pprint ); 652} 653 654void TY_(PFlushLine)( TidyDocImpl* doc, uint indent ) 655{ 656 TidyPrintImpl* pprint = &doc->pprint; 657 658 if ( pprint->linelen > 0 ) 659 { 660 uint i; 661 662 CheckWrapLine( doc ); 663 664 if ( WantIndent(doc) ) 665 { 666 uint spaces = GetSpaces( pprint ); 667 for ( i = 0; i < spaces; ++i ) 668 TY_(WriteChar)( ' ', doc->docOut ); 669 } 670 671 for ( i = 0; i < pprint->linelen; ++i ) 672 TY_(WriteChar)( pprint->linebuf[i], doc->docOut ); 673 674 if ( IsInString(pprint) ) 675 TY_(WriteChar)( '\\', doc->docOut ); 676 ResetLine( pprint ); 677 pprint->linelen = 0; 678 } 679 680 TY_(WriteChar)( '\n', doc->docOut ); 681 pprint->indent[ 0 ].spaces = indent; 682} 683 684static void PCondFlushLine( TidyDocImpl* doc, uint indent ) 685{ 686 TidyPrintImpl* pprint = &doc->pprint; 687 if ( pprint->linelen > 0 ) 688 { 689 uint i; 690 691 CheckWrapLine( doc ); 692 693 if ( WantIndent(doc) ) 694 { 695 uint spaces = GetSpaces( pprint ); 696 for ( i = 0; i < spaces; ++i ) 697 TY_(WriteChar)(' ', doc->docOut); 698 } 699 700 for ( i = 0; i < pprint->linelen; ++i ) 701 TY_(WriteChar)( pprint->linebuf[i], doc->docOut ); 702 703 if ( IsInString(pprint) ) 704 TY_(WriteChar)( '\\', doc->docOut ); 705 ResetLine( pprint ); 706 707 TY_(WriteChar)( '\n', doc->docOut ); 708 pprint->indent[ 0 ].spaces = indent; 709 pprint->linelen = 0; 710 } 711} 712 713static void PPrintChar( TidyDocImpl* doc, uint c, uint mode ) 714{ 715 tmbchar entity[128]; 716 ctmbstr p; 717 TidyPrintImpl* pprint = &doc->pprint; 718 uint outenc = cfg( doc, TidyOutCharEncoding ); 719 Bool qmark = cfgBool( doc, TidyQuoteMarks ); 720 721 if ( c == ' ' && !(mode & (PREFORMATTED | COMMENT | ATTRIBVALUE | CDATA))) 722 { 723 /* coerce a space character to a non-breaking space */ 724 if (mode & NOWRAP) 725 { 726 ctmbstr ent = " "; 727 /* by default XML doesn't define */ 728 if ( cfgBool(doc, TidyNumEntities) || cfgBool(doc, TidyXmlTags) ) 729 ent = " "; 730 AddString( pprint, ent ); 731 return; 732 } 733 else 734 pprint->wraphere = pprint->linelen; 735 } 736 737 /* comment characters are passed raw */ 738 if ( mode & (COMMENT | CDATA) ) 739 { 740 AddChar( pprint, c ); 741 return; 742 } 743 744 /* except in CDATA map < to < etc. */ 745 if ( !(mode & CDATA) ) 746 { 747 if ( c == '<') 748 { 749 AddString( pprint, "<" ); 750 return; 751 } 752 753 if ( c == '>') 754 { 755 AddString( pprint, ">" ); 756 return; 757 } 758 759 /* 760 naked '&' chars can be left alone or 761 quoted as & The latter is required 762 for XML where naked '&' are illegal. 763 */ 764/* Apple Changes: 765 2007-02-07 iccir When output encoding is RAW, the lexer is forced 766 to insert the entire entity instead of a converted 767 single character. Hence, &'s will appear in the 768 stream and must not be converted to & 769 770 See fix for [4642206] in lexer.c 771*/ 772#ifdef TIDY_APPLE_CHANGES 773 if ( c == '&' && cfgBool(doc, TidyQuoteAmpersand) && (cfg(doc, TidyOutCharEncoding) != RAW) ) 774#else 775 if ( c == '&' && cfgBool(doc, TidyQuoteAmpersand) ) 776#endif 777 { 778 AddString( pprint, "&" ); 779 return; 780 } 781 782 if ( c == '"' && qmark ) 783 { 784 AddString( pprint, """ ); 785 return; 786 } 787 788 if ( c == '\'' && qmark ) 789 { 790 AddString( pprint, "'" ); 791 return; 792 } 793 794 if ( c == 160 && outenc != RAW ) 795 { 796 if ( cfgBool(doc, TidyQuoteNbsp) ) 797 { 798 if ( cfgBool(doc, TidyNumEntities) || 799 cfgBool(doc, TidyXmlTags) ) 800 AddString( pprint, " " ); 801 else 802 AddString( pprint, " " ); 803 } 804 else 805 AddChar( pprint, c ); 806 return; 807 } 808 } 809 810#if SUPPORT_ASIAN_ENCODINGS 811 812 /* #431953 - start RJ */ 813 /* Handle encoding-specific issues */ 814 switch ( outenc ) 815 { 816 case UTF8: 817#if SUPPORT_UTF16_ENCODINGS 818 case UTF16: 819 case UTF16LE: 820 case UTF16BE: 821#endif 822 if (!(mode & PREFORMATTED) && cfg(doc, TidyPunctWrap)) 823 { 824 WrapPoint wp = CharacterWrapPoint(c); 825 if (wp == WrapBefore) 826 pprint->wraphere = pprint->linelen; 827 else if (wp == WrapAfter) 828 pprint->wraphere = pprint->linelen + 1; 829 } 830 break; 831 832 case BIG5: 833 /* Allow linebreak at Chinese punctuation characters */ 834 /* There are not many spaces in Chinese */ 835 AddChar( pprint, c ); 836 if (!(mode & PREFORMATTED) && cfg(doc, TidyPunctWrap)) 837 { 838 WrapPoint wp = Big5WrapPoint(c); 839 if (wp == WrapBefore) 840 pprint->wraphere = pprint->linelen; 841 else if (wp == WrapAfter) 842 pprint->wraphere = pprint->linelen + 1; 843 } 844 return; 845 846 case SHIFTJIS: 847#ifndef NO_NATIVE_ISO2022_SUPPORT 848 case ISO2022: /* ISO 2022 characters are passed raw */ 849#endif 850 case RAW: 851 AddChar( pprint, c ); 852 return; 853 } 854 /* #431953 - end RJ */ 855 856#else /* SUPPORT_ASIAN_ENCODINGS */ 857 858 /* otherwise ISO 2022 characters are passed raw */ 859 if ( 860#ifndef NO_NATIVE_ISO2022_SUPPORT 861 outenc == ISO2022 || 862#endif 863 outenc == RAW ) 864 { 865 AddChar( pprint, c ); 866 return; 867 } 868 869#endif /* SUPPORT_ASIAN_ENCODINGS */ 870 871 /* don't map latin-1 chars to entities */ 872 if ( outenc == LATIN1 ) 873 { 874 if (c > 255) /* multi byte chars */ 875 { 876 uint vers = TY_(HTMLVersion)( doc ); 877 if ( !cfgBool(doc, TidyNumEntities) && (p = TY_(EntityName)(c, vers)) ) 878 TY_(tmbsnprintf)(entity, sizeof(entity), "&%s;", p); 879 else 880 TY_(tmbsnprintf)(entity, sizeof(entity), "&#%u;", c); 881 882 AddString( pprint, entity ); 883 return; 884 } 885 886 if (c > 126 && c < 160) 887 { 888 TY_(tmbsnprintf)(entity, sizeof(entity), "&#%u;", c); 889 AddString( pprint, entity ); 890 return; 891 } 892 893 AddChar( pprint, c ); 894 return; 895 } 896 897 /* don't map UTF-8 chars to entities */ 898 if ( outenc == UTF8 ) 899 { 900 AddChar( pprint, c ); 901 return; 902 } 903 904#if SUPPORT_UTF16_ENCODINGS 905 /* don't map UTF-16 chars to entities */ 906 if ( outenc == UTF16 || outenc == UTF16LE || outenc == UTF16BE ) 907 { 908 AddChar( pprint, c ); 909 return; 910 } 911#endif 912 913 /* use numeric entities only for XML */ 914 if ( cfgBool(doc, TidyXmlTags) ) 915 { 916 /* if ASCII use numeric entities for chars > 127 */ 917 if ( c > 127 && outenc == ASCII ) 918 { 919 TY_(tmbsnprintf)(entity, sizeof(entity), "&#%u;", c); 920 AddString( pprint, entity ); 921 return; 922 } 923 924 /* otherwise output char raw */ 925 AddChar( pprint, c ); 926 return; 927 } 928 929 /* default treatment for ASCII */ 930 if ( outenc == ASCII && (c > 126 || (c < ' ' && c != '\t')) ) 931 { 932 uint vers = TY_(HTMLVersion)( doc ); 933 if (!cfgBool(doc, TidyNumEntities) && (p = TY_(EntityName)(c, vers)) ) 934 TY_(tmbsnprintf)(entity, sizeof(entity), "&%s;", p); 935 else 936 TY_(tmbsnprintf)(entity, sizeof(entity), "&#%u;", c); 937 938 AddString( pprint, entity ); 939 return; 940 } 941 942 AddChar( pprint, c ); 943} 944 945static uint IncrWS( uint start, uint end, uint indent, int ixWS ) 946{ 947 if ( ixWS > 0 ) 948 { 949 uint st = start + MIN( (uint)ixWS, indent ); 950 start = MIN( st, end ); 951 } 952 return start; 953} 954/* 955 The line buffer is uint not char so we can 956 hold Unicode values unencoded. The translation 957 to UTF-8 is deferred to the TY_(WriteChar)() routine called 958 to flush the line buffer. 959*/ 960static void PPrintText( TidyDocImpl* doc, uint mode, uint indent, 961 Node* node ) 962{ 963 uint start = node->start; 964 uint end = node->end; 965 uint ix, c = 0; 966 int ixNL = TextEndsWithNewline( doc->lexer, node, mode ); 967 int ixWS = TextStartsWithWhitespace( doc->lexer, node, start, mode ); 968 if ( ixNL > 0 ) 969 end -= ixNL; 970 start = IncrWS( start, end, indent, ixWS ); 971 972 for ( ix = start; ix < end; ++ix ) 973 { 974 CheckWrapIndent( doc, indent ); 975 /* 976 if ( CheckWrapIndent(doc, indent) ) 977 { 978 ixWS = TextStartsWithWhitespace( doc->lexer, node, ix ); 979 ix = IncrWS( ix, end, indent, ixWS ); 980 } 981 */ 982 c = (byte) doc->lexer->lexbuf[ix]; 983 984 /* look for UTF-8 multibyte character */ 985 if ( c > 0x7F ) 986 ix += TY_(GetUTF8)( doc->lexer->lexbuf + ix, &c ); 987 988 if ( c == '\n' ) 989 { 990 TY_(PFlushLine)( doc, indent ); 991 ixWS = TextStartsWithWhitespace( doc->lexer, node, ix+1, mode ); 992 ix = IncrWS( ix, end, indent, ixWS ); 993 } 994 else 995 { 996 PPrintChar( doc, c, mode ); 997 } 998 } 999} 1000 1001#if 0 1002static void PPrintString( TidyDocImpl* doc, uint indent, ctmbstr str ) 1003{ 1004 while ( *str != '\0' ) 1005 AddChar( &doc->pprint, *str++ ); 1006} 1007#endif /* 0 */ 1008 1009 1010static void PPrintAttrValue( TidyDocImpl* doc, uint indent, 1011 ctmbstr value, uint delim, Bool wrappable, Bool scriptAttr ) 1012{ 1013 TidyPrintImpl* pprint = &doc->pprint; 1014 Bool scriptlets = cfgBool(doc, TidyWrapScriptlets); 1015 1016 uint mode = PREFORMATTED | ATTRIBVALUE; 1017 if ( wrappable ) 1018 mode = NORMAL | ATTRIBVALUE; 1019 1020 /* look for ASP, Tango or PHP instructions for computed attribute value */ 1021 if ( value && value[0] == '<' ) 1022 { 1023 if ( value[1] == '%' || value[1] == '@'|| 1024 TY_(tmbstrncmp)(value, "<?php", 5) == 0 ) 1025 mode |= CDATA; 1026 } 1027 1028 if ( delim == 0 ) 1029 delim = '"'; 1030 1031 AddChar( pprint, '=' ); 1032 1033 /* don't wrap after "=" for xml documents */ 1034 if ( !cfgBool(doc, TidyXmlOut) || cfgBool(doc, TidyXhtmlOut) ) 1035 { 1036 SetWrap( doc, indent ); 1037 CheckWrapIndent( doc, indent ); 1038 /* 1039 if ( !SetWrap(doc, indent) ) 1040 PCondFlushLine( doc, indent ); 1041 */ 1042 } 1043 1044 AddChar( pprint, delim ); 1045 1046 if ( value ) 1047 { 1048 uint wraplen = cfg( doc, TidyWrapLen ); 1049 int attrStart = SetInAttrVal( pprint ); 1050 int strStart = ClearInString( pprint ); 1051 1052 while (*value != '\0') 1053 { 1054 uint c = *value; 1055 1056 if ( wrappable && c == ' ' ) 1057 SetWrapAttr( doc, indent, attrStart, strStart ); 1058 1059 if ( wrappable && pprint->wraphere > 0 && 1060 GetSpaces(pprint) + pprint->linelen >= wraplen ) 1061 WrapAttrVal( doc ); 1062 1063 if ( c == delim ) 1064 { 1065 ctmbstr entity = (c == '"' ? """ : "'"); 1066 AddString( pprint, entity ); 1067 ++value; 1068 continue; 1069 } 1070 else if (c == '"') 1071 { 1072 if ( cfgBool(doc, TidyQuoteMarks) ) 1073 AddString( pprint, """ ); 1074 else 1075 AddChar( pprint, c ); 1076 1077 if ( delim == '\'' && scriptAttr && scriptlets ) 1078 strStart = ToggleInString( pprint ); 1079 1080 ++value; 1081 continue; 1082 } 1083 else if ( c == '\'' ) 1084 { 1085 if ( cfgBool(doc, TidyQuoteMarks) ) 1086 AddString( pprint, "'" ); 1087 else 1088 AddChar( pprint, c ); 1089 1090 if ( delim == '"' && scriptAttr && scriptlets ) 1091 strStart = ToggleInString( pprint ); 1092 1093 ++value; 1094 continue; 1095 } 1096 1097 /* look for UTF-8 multibyte character */ 1098 if ( c > 0x7F ) 1099 value += TY_(GetUTF8)( value, &c ); 1100 ++value; 1101 1102 if ( c == '\n' ) 1103 { 1104 /* No indent inside Javascript literals */ 1105 TY_(PFlushLine)( doc, (strStart < 0 1106 && !cfgBool(doc, TidyLiteralAttribs) ? 1107 indent : 0) ); 1108 continue; 1109 } 1110 PPrintChar( doc, c, mode ); 1111 } 1112 ClearInAttrVal( pprint ); 1113 ClearInString( pprint ); 1114 } 1115 AddChar( pprint, delim ); 1116} 1117 1118static uint AttrIndent( TidyDocImpl* doc, Node* node, AttVal* ARG_UNUSED(attr) ) 1119{ 1120 uint spaces = cfg( doc, TidyIndentSpaces ); 1121 uint xtra = 2; /* 1 for the '<', another for the ' ' */ 1122 if ( node->element == NULL ) 1123 return spaces; 1124 1125 if ( !TY_(nodeHasCM)(node, CM_INLINE) || 1126 !ShouldIndent(doc, node->parent ? node->parent: node) ) 1127 return xtra + TY_(tmbstrlen)( node->element ); 1128 1129 if ( NULL != (node = TY_(FindContainer)(node)) ) 1130 return xtra + TY_(tmbstrlen)( node->element ); 1131 return spaces; 1132} 1133 1134static Bool AttrNoIndentFirst( /*TidyDocImpl* doc,*/ Node* node, AttVal* attr ) 1135{ 1136 return ( attr==node->attributes ); 1137 1138 /*&& 1139 ( InsideHead(doc, node) || 1140 !TY_(nodeHasCM)(node, CM_INLINE) ) ); 1141 */ 1142} 1143 1144static void PPrintAttribute( TidyDocImpl* doc, uint indent, 1145 Node *node, AttVal *attr ) 1146{ 1147 TidyPrintImpl* pprint = &doc->pprint; 1148 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1149 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1150 Bool wrapAttrs = cfgBool( doc, TidyWrapAttVals ); 1151 Bool ucAttrs = cfgBool( doc, TidyUpperCaseAttrs ); 1152 Bool indAttrs = cfgBool( doc, TidyIndentAttributes ); 1153 uint xtra = AttrIndent( doc, node, attr ); 1154 Bool first = AttrNoIndentFirst( /*doc,*/ node, attr ); 1155 tmbstr name = attr->attribute; 1156 Bool wrappable = no; 1157 tchar c; 1158 1159 /* fix for odd attribute indentation bug triggered by long values */ 1160 if (!indAttrs) 1161 xtra = 0; 1162 1163 if ( indAttrs ) 1164 { 1165 if ( TY_(nodeIsElement)(node) && !first ) 1166 { 1167 indent += xtra; 1168 PCondFlushLine( doc, indent ); 1169 } 1170 else 1171 indAttrs = no; 1172 } 1173 1174 CheckWrapIndent( doc, indent ); 1175 1176 if ( !xmlOut && !xhtmlOut && attr->dict ) 1177 { 1178 if ( TY_(IsScript)(doc, name) ) 1179 wrappable = cfgBool( doc, TidyWrapScriptlets ); 1180 else if (!(attrIsCONTENT(attr) || attrIsVALUE(attr) || attrIsALT(attr)) && wrapAttrs ) 1181 wrappable = yes; 1182 } 1183 1184 if ( !first && !SetWrap(doc, indent) ) 1185 { 1186 TY_(PFlushLine)( doc, indent+xtra ); /* Put it on next line */ 1187 } 1188 else if ( pprint->linelen > 0 ) 1189 { 1190 AddChar( pprint, ' ' ); 1191 } 1192 1193 /* Attribute name */ 1194 while (*name) 1195 { 1196 c = (unsigned char)*name; 1197 1198 if (c > 0x7F) 1199 name += TY_(GetUTF8)(name, &c); 1200 else if (ucAttrs) 1201 c = TY_(ToUpper)(c); 1202 1203 AddChar(pprint, c); 1204 ++name; 1205 } 1206 1207/* fix for bug 732038 */ 1208#if 0 1209 /* If not indenting attributes, bump up indent for 1210 ** value after putting out name. 1211 */ 1212 if ( !indAttrs ) 1213 indent += xtra; 1214#endif 1215 1216 CheckWrapIndent( doc, indent ); 1217 1218 if ( attr->value == NULL ) 1219 { 1220 Bool isB = TY_(IsBoolAttribute)(attr); 1221 Bool scriptAttr = TY_(attrIsEvent)(attr); 1222 1223 if ( xmlOut ) 1224 PPrintAttrValue( doc, indent, isB ? attr->attribute : NULLSTR, 1225 attr->delim, no, scriptAttr ); 1226 1227 else if ( !isB && !TY_(IsNewNode)(node) ) 1228 PPrintAttrValue( doc, indent, "", attr->delim, yes, scriptAttr ); 1229 1230 else 1231 SetWrap( doc, indent ); 1232 } 1233 else 1234 PPrintAttrValue( doc, indent, attr->value, attr->delim, wrappable, no ); 1235} 1236 1237static void PPrintAttrs( TidyDocImpl* doc, uint indent, Node *node ) 1238{ 1239 TidyPrintImpl* pprint = &doc->pprint; 1240 AttVal* av; 1241 1242 /* add xml:space attribute to pre and other elements */ 1243 if ( cfgBool(doc, TidyXmlOut) && cfgBool(doc, TidyXmlSpace) && 1244 !TY_(GetAttrByName)(node, "xml:space") && 1245 TY_(XMLPreserveWhiteSpace)(doc, node) ) 1246 { 1247 TY_(AddAttribute)( doc, node, "xml:space", "preserve" ); 1248 } 1249 1250 for ( av = node->attributes; av; av = av->next ) 1251 { 1252 if ( av->attribute != NULL ) 1253 { 1254 PPrintAttribute( doc, indent, node, av ); 1255 } 1256 else if ( av->asp != NULL ) 1257 { 1258 AddChar( pprint, ' ' ); 1259 PPrintAsp( doc, indent, av->asp ); 1260 } 1261 else if ( av->php != NULL ) 1262 { 1263 AddChar( pprint, ' ' ); 1264 PPrintPhp( doc, indent, av->php ); 1265 } 1266 } 1267} 1268 1269/* 1270 Line can be wrapped immediately after inline start tag provided 1271 if follows a text node ending in a space, or it follows a <br>, 1272 or its parent is an inline element that that rule applies to. 1273 This behaviour was reverse engineered from Netscape 3.0. 1274 1275 Line wrapping can occur if an element is not empty and before a block 1276 level. For instance: 1277 <p><span> 1278 x</span>y</p> 1279 will display properly. Whereas 1280 <p><img /> 1281 x<</p> won't. 1282*/ 1283static Bool AfterSpaceImp(Lexer *lexer, Node *node, Bool isEmpty) 1284{ 1285 Node *prev; 1286 1287 if ( !TY_(nodeCMIsInline)(node) ) 1288 return yes; 1289 1290 prev = node->prev; 1291 if (prev) 1292 { 1293 if (TY_(nodeIsText)(prev) && prev->end > prev->start) 1294 { 1295 uint i, c = '\0'; /* initialised to avoid warnings */ 1296 for (i = prev->start; i < prev->end; ++i) 1297 { 1298 c = (byte) lexer->lexbuf[i]; 1299 if ( c > 0x7F ) 1300 i += TY_(GetUTF8)( lexer->lexbuf + i, &c ); 1301 } 1302 1303 if ( c == ' ' || c == '\n' ) 1304 return yes; 1305 } 1306 else if (nodeIsBR(prev)) 1307 return yes; 1308 1309 return no; 1310 } 1311 1312 if ( isEmpty && !TY_(nodeCMIsInline)(node->parent) ) 1313 return no; 1314 1315 return AfterSpaceImp(lexer, node->parent, isEmpty); 1316} 1317 1318static Bool AfterSpace(Lexer *lexer, Node *node) 1319{ 1320 return AfterSpaceImp(lexer, node, TY_(nodeCMIsEmpty)(node)); 1321} 1322 1323static void PPrintTag( TidyDocImpl* doc, 1324 uint mode, uint indent, Node *node ) 1325{ 1326 TidyPrintImpl* pprint = &doc->pprint; 1327 Bool uc = cfgBool( doc, TidyUpperCaseTags ); 1328 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1329 Bool xmlOut = cfgBool( doc, TidyXmlOut ); 1330 tchar c; 1331 tmbstr s = node->element; 1332 1333 AddChar( pprint, '<' ); 1334 1335 if ( node->type == EndTag ) 1336 AddChar( pprint, '/' ); 1337 1338 if (s) 1339 { 1340 while (*s) 1341 { 1342 c = (unsigned char)*s; 1343 1344 if (c > 0x7F) 1345 s += TY_(GetUTF8)(s, &c); 1346 else if (uc) 1347 c = TY_(ToUpper)(c); 1348 1349 AddChar(pprint, c); 1350 ++s; 1351 } 1352 } 1353 1354 PPrintAttrs( doc, indent, node ); 1355 1356 if ( (xmlOut || xhtmlOut) && 1357 (node->type == StartEndTag || TY_(nodeCMIsEmpty)(node)) ) 1358 { 1359 AddChar( pprint, ' ' ); /* Space is NS compatibility hack <br /> */ 1360 AddChar( pprint, '/' ); /* Required end tag marker */ 1361 } 1362 1363 AddChar( pprint, '>' ); 1364 1365 if ( (node->type != StartEndTag || xhtmlOut) && !(mode & PREFORMATTED) ) 1366 { 1367 uint wraplen = cfg( doc, TidyWrapLen ); 1368 CheckWrapIndent( doc, indent ); 1369 1370 if ( indent + pprint->linelen < wraplen ) 1371 { 1372 /* wrap after start tag if is <br/> or if it's not inline. 1373 Technically, it would be safe to call only AfterSpace. 1374 However, it would disrupt the existing algorithm. So let's 1375 leave as is. Note that AfterSpace returns true for non inline 1376 elements but can still be false for some <br>. So it has to 1377 stay as well. */ 1378 if (!(mode & NOWRAP) && (!TY_(nodeCMIsInline)(node) || nodeIsBR(node)) 1379 && AfterSpace(doc->lexer, node)) 1380 { 1381 pprint->wraphere = pprint->linelen; 1382 } 1383 } 1384 /* flush the current buffer only if it is known to be safe, 1385 i.e. it will not introduce some spurious white spaces. 1386 See bug #996484 */ 1387 else if ( mode & NOWRAP || 1388 nodeIsBR(node) || AfterSpace(doc->lexer, node)) 1389 PCondFlushLine( doc, indent ); 1390 } 1391} 1392 1393static void PPrintEndTag( TidyDocImpl* doc, uint ARG_UNUSED(mode), 1394 uint ARG_UNUSED(indent), Node *node ) 1395{ 1396 TidyPrintImpl* pprint = &doc->pprint; 1397 Bool uc = cfgBool( doc, TidyUpperCaseTags ); 1398 tmbstr s = node->element; 1399 tchar c; 1400 1401 /* 1402 Netscape ignores SGML standard by not ignoring a 1403 line break before </A> or </U> etc. To avoid rendering 1404 this as an underlined space, I disable line wrapping 1405 before inline end tags by the #if 0 ... #endif 1406 */ 1407#if 0 1408 if ( !(mode & NOWRAP) ) 1409 SetWrap( doc, indent ); 1410#endif 1411 1412 AddString( pprint, "</" ); 1413 1414 if (s) 1415 { 1416 while (*s) 1417 { 1418 c = (unsigned char)*s; 1419 1420 if (c > 0x7F) 1421 s += TY_(GetUTF8)(s, &c); 1422 else if (uc) 1423 c = TY_(ToUpper)(c); 1424 1425 AddChar(pprint, c); 1426 ++s; 1427 } 1428 } 1429 1430 AddChar( pprint, '>' ); 1431} 1432 1433static void PPrintComment( TidyDocImpl* doc, uint indent, Node* node ) 1434{ 1435 TidyPrintImpl* pprint = &doc->pprint; 1436 1437 SetWrap( doc, indent ); 1438 AddString( pprint, "<!--" ); 1439 1440#if 0 1441 SetWrap( doc, indent ); 1442#endif 1443 1444 PPrintText(doc, COMMENT, 0, node); 1445 1446#if 0 1447 SetWrap( doc, indent ); 1448 AddString( pprint, "--" ); 1449#endif 1450 1451 AddString(pprint, "--"); 1452 AddChar( pprint, '>' ); 1453 if ( node->linebreak && node->next ) 1454 TY_(PFlushLine)( doc, indent ); 1455} 1456 1457static void PPrintDocType( TidyDocImpl* doc, uint indent, Node *node ) 1458{ 1459 TidyPrintImpl* pprint = &doc->pprint; 1460 uint wraplen = cfg( doc, TidyWrapLen ); 1461 uint spaces = cfg( doc, TidyIndentSpaces ); 1462 AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC"); 1463 AttVal* sys = TY_(GetAttrByName)(node, "SYSTEM"); 1464 1465 /* todo: handle non-ASCII characters in FPI / SI / node->element */ 1466 1467 SetWrap( doc, indent ); 1468 PCondFlushLine( doc, indent ); 1469 1470 AddString( pprint, "<!DOCTYPE " ); 1471 SetWrap( doc, indent ); 1472 if (node->element) 1473 { 1474 AddString(pprint, node->element); 1475 } 1476 1477 if (fpi && fpi->value) 1478 { 1479 AddString(pprint, " PUBLIC "); 1480 AddChar(pprint, fpi->delim); 1481 AddString(pprint, fpi->value); 1482 AddChar(pprint, fpi->delim); 1483 } 1484 1485 if (fpi && fpi->value && sys && sys->value) 1486 { 1487 uint i = pprint->linelen - (TY_(tmbstrlen)(sys->value) + 2) - 1; 1488 if (!(i>0&&TY_(tmbstrlen)(sys->value)+2+i<wraplen&&i<=(spaces?spaces:2)*2)) 1489 i = 0; 1490 1491 PCondFlushLine(doc, i); 1492 if (pprint->linelen) 1493 AddChar(pprint, ' '); 1494 } 1495 else if (sys && sys->value) 1496 { 1497 AddString(pprint, " SYSTEM "); 1498 } 1499 1500 if (sys && sys->value) 1501 { 1502 AddChar(pprint, sys->delim); 1503 AddString(pprint, sys->value); 1504 AddChar(pprint, sys->delim); 1505 } 1506 1507 if (node->content) 1508 { 1509 PCondFlushLine(doc, indent); 1510 AddChar(pprint, '['); 1511 PPrintText(doc, CDATA, 0, node->content); 1512 AddChar(pprint, ']'); 1513 } 1514 1515 SetWrap( doc, 0 ); 1516 AddChar( pprint, '>' ); 1517 PCondFlushLine( doc, indent ); 1518} 1519 1520static void PPrintPI( TidyDocImpl* doc, uint indent, Node *node ) 1521{ 1522 TidyPrintImpl* pprint = &doc->pprint; 1523 tchar c; 1524 tmbstr s; 1525 1526 SetWrap( doc, indent ); 1527 AddString( pprint, "<?" ); 1528 1529 s = node->element; 1530 1531 while (s && *s) 1532 { 1533 c = (unsigned char)*s; 1534 if (c > 0x7F) 1535 s += TY_(GetUTF8)(s, &c); 1536 AddChar(pprint, c); 1537 ++s; 1538 } 1539 1540 /* set CDATA to pass < and > unescaped */ 1541 PPrintText( doc, CDATA, indent, node ); 1542 1543 if (cfgBool(doc, TidyXmlOut) || 1544 cfgBool(doc, TidyXhtmlOut) || node->closed) 1545 AddChar( pprint, '?' ); 1546 1547 AddChar( pprint, '>' ); 1548 PCondFlushLine( doc, indent ); 1549} 1550 1551static void PPrintXmlDecl( TidyDocImpl* doc, uint indent, Node *node ) 1552{ 1553 AttVal* att; 1554 uint saveWrap; 1555 TidyPrintImpl* pprint = &doc->pprint; 1556 Bool ucAttrs; 1557 SetWrap( doc, indent ); 1558 saveWrap = WrapOff( doc ); 1559 1560 /* no case translation for XML declaration pseudo attributes */ 1561 ucAttrs = cfgBool(doc, TidyUpperCaseAttrs); 1562 TY_(SetOptionBool)(doc, TidyUpperCaseAttrs, no); 1563 1564 AddString( pprint, "<?xml" ); 1565 1566 /* Force order of XML declaration attributes */ 1567 /* PPrintAttrs( doc, indent, node ); */ 1568 if ( NULL != (att = TY_(AttrGetById)(node, TidyAttr_VERSION)) ) 1569 PPrintAttribute( doc, indent, node, att ); 1570 if ( NULL != (att = TY_(AttrGetById)(node, TidyAttr_ENCODING)) ) 1571 PPrintAttribute( doc, indent, node, att ); 1572 if ( NULL != (att = TY_(GetAttrByName)(node, "standalone")) ) 1573 PPrintAttribute( doc, indent, node, att ); 1574 1575 /* restore old config value */ 1576 TY_(SetOptionBool)(doc, TidyUpperCaseAttrs, ucAttrs); 1577 1578 if ( node->end <= 0 || doc->lexer->lexbuf[node->end - 1] != '?' ) 1579 AddChar( pprint, '?' ); 1580 AddChar( pprint, '>' ); 1581 WrapOn( doc, saveWrap ); 1582 TY_(PFlushLine)( doc, indent ); 1583} 1584 1585/* note ASP and JSTE share <% ... %> syntax */ 1586static void PPrintAsp( TidyDocImpl* doc, uint indent, Node *node ) 1587{ 1588 TidyPrintImpl* pprint = &doc->pprint; 1589 Bool wrapAsp = cfgBool( doc, TidyWrapAsp ); 1590 Bool wrapJste = cfgBool( doc, TidyWrapJste ); 1591 uint saveWrap = WrapOffCond( doc, !wrapAsp || !wrapJste ); 1592 1593#if 0 1594 SetWrap( doc, indent ); 1595#endif 1596 AddString( pprint, "<%" ); 1597 PPrintText( doc, (wrapAsp ? CDATA : COMMENT), indent, node ); 1598 AddString( pprint, "%>" ); 1599 1600 /* PCondFlushLine( doc, indent ); */ 1601 WrapOn( doc, saveWrap ); 1602} 1603 1604/* JSTE also supports <# ... #> syntax */ 1605static void PPrintJste( TidyDocImpl* doc, uint indent, Node *node ) 1606{ 1607 TidyPrintImpl* pprint = &doc->pprint; 1608 Bool wrapAsp = cfgBool( doc, TidyWrapAsp ); 1609 uint saveWrap = WrapOffCond( doc, !wrapAsp ); 1610 1611 AddString( pprint, "<#" ); 1612 PPrintText( doc, (cfgBool(doc, TidyWrapJste) ? CDATA : COMMENT), 1613 indent, node ); 1614 AddString( pprint, "#>" ); 1615 1616 /* PCondFlushLine( doc, indent ); */ 1617 WrapOn( doc, saveWrap ); 1618} 1619 1620/* PHP is based on XML processing instructions */ 1621static void PPrintPhp( TidyDocImpl* doc, uint indent, Node *node ) 1622{ 1623 TidyPrintImpl* pprint = &doc->pprint; 1624 Bool wrapPhp = cfgBool( doc, TidyWrapPhp ); 1625 uint saveWrap = WrapOffCond( doc, !wrapPhp ); 1626#if 0 1627 SetWrap( doc, indent ); 1628#endif 1629 1630 AddString( pprint, "<?" ); 1631 PPrintText( doc, (wrapPhp ? CDATA : COMMENT), 1632 indent, node ); 1633 AddString( pprint, "?>" ); 1634 1635 /* PCondFlushLine( doc, indent ); */ 1636 WrapOn( doc, saveWrap ); 1637} 1638 1639static void PPrintCDATA( TidyDocImpl* doc, uint indent, Node *node ) 1640{ 1641 uint saveWrap; 1642 TidyPrintImpl* pprint = &doc->pprint; 1643 Bool indentCData = cfgBool( doc, TidyIndentCdata ); 1644 if ( !indentCData ) 1645 indent = 0; 1646 1647 PCondFlushLine( doc, indent ); 1648 saveWrap = WrapOff( doc ); /* disable wrapping */ 1649 1650 AddString( pprint, "<![CDATA[" ); 1651 PPrintText( doc, COMMENT, indent, node ); 1652 AddString( pprint, "]]>" ); 1653 1654 PCondFlushLine( doc, indent ); 1655 WrapOn( doc, saveWrap ); /* restore wrapping */ 1656} 1657 1658static void PPrintSection( TidyDocImpl* doc, uint indent, Node *node ) 1659{ 1660 TidyPrintImpl* pprint = &doc->pprint; 1661 Bool wrapSect = cfgBool( doc, TidyWrapSection ); 1662 uint saveWrap = WrapOffCond( doc, !wrapSect ); 1663#if 0 1664 SetWrap( doc, indent ); 1665#endif 1666 1667 AddString( pprint, "<![" ); 1668 PPrintText( doc, (wrapSect ? CDATA : COMMENT), 1669 indent, node ); 1670 AddString( pprint, "]>" ); 1671 1672 /* PCondFlushLine( doc, indent ); */ 1673 WrapOn( doc, saveWrap ); 1674} 1675 1676 1677#if 0 1678/* 1679** Print script and style elements. For XHTML, wrap the content as follows: 1680** 1681** JavaScript: 1682** //<![CDATA[ 1683** content 1684** //]]> 1685** VBScript: 1686** '<![CDATA[ 1687** content 1688** ']]> 1689** CSS: 1690** / *<![CDATA[* / Extra spaces to keep compiler happy 1691** content 1692** / *]]>* / 1693** other: 1694** <![CDATA[ 1695** content 1696** ]]> 1697*/ 1698#endif 1699 1700static ctmbstr CDATA_START = "<![CDATA["; 1701static ctmbstr CDATA_END = "]]>"; 1702static ctmbstr JS_COMMENT_START = "//"; 1703static ctmbstr JS_COMMENT_END = ""; 1704static ctmbstr VB_COMMENT_START = "\'"; 1705static ctmbstr VB_COMMENT_END = ""; 1706static ctmbstr CSS_COMMENT_START = "/*"; 1707static ctmbstr CSS_COMMENT_END = "*/"; 1708static ctmbstr DEFAULT_COMMENT_START = ""; 1709static ctmbstr DEFAULT_COMMENT_END = ""; 1710 1711static Bool InsideHead( TidyDocImpl* doc, Node *node ) 1712{ 1713 if ( nodeIsHEAD(node) ) 1714 return yes; 1715 1716 if ( node->parent != NULL ) 1717 return InsideHead( doc, node->parent ); 1718 1719 return no; 1720} 1721 1722/* Is text node and already ends w/ a newline? 1723 1724 Used to pretty print CDATA/PRE text content. 1725 If it already ends on a newline, it is not 1726 necessary to print another before printing end tag. 1727*/ 1728static int TextEndsWithNewline(Lexer *lexer, Node *node, uint mode ) 1729{ 1730 if ( (mode & (CDATA|COMMENT)) && TY_(nodeIsText)(node) && node->end > node->start ) 1731 { 1732 uint ch, ix = node->end - 1; 1733 /* Skip non-newline whitespace. */ 1734 while ( ix >= node->start && (ch = (lexer->lexbuf[ix] & 0xff)) 1735 && ( ch == ' ' || ch == '\t' || ch == '\r' ) ) 1736 --ix; 1737 1738 if ( lexer->lexbuf[ ix ] == '\n' ) 1739 return node->end - ix - 1; /* #543262 tidy eats all memory */ 1740 } 1741 return -1; 1742} 1743 1744static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode ) 1745{ 1746 assert( node != NULL ); 1747 if ( (mode & (CDATA|COMMENT)) && TY_(nodeIsText)(node) && node->end > node->start && start >= node->start ) 1748 { 1749 uint ch, ix = start; 1750 /* Skip whitespace. */ 1751 while ( ix < node->end && (ch = (lexer->lexbuf[ix] & 0xff)) 1752 && ( ch==' ' || ch=='\t' || ch=='\r' ) ) 1753 ++ix; 1754 1755 if ( ix > start ) 1756 return ix - start; 1757 } 1758 return -1; 1759} 1760 1761static Bool HasCDATA( Lexer* lexer, Node* node ) 1762{ 1763 /* Scan forward through the textarray. Since the characters we're 1764 ** looking for are < 0x7f, we don't have to do any UTF-8 decoding. 1765 */ 1766 ctmbstr start = lexer->lexbuf + node->start; 1767 int len = node->end - node->start + 1; 1768 1769 if ( node->type != TextNode ) 1770 return no; 1771 1772 return ( NULL != TY_(tmbsubstrn)( start, len, CDATA_START )); 1773} 1774 1775 1776static 1777void PPrintScriptStyle( TidyDocImpl* doc, uint mode, uint indent, Node *node ) 1778{ 1779 TidyPrintImpl* pprint = &doc->pprint; 1780 Node* content; 1781 ctmbstr commentStart = DEFAULT_COMMENT_START; 1782 ctmbstr commentEnd = DEFAULT_COMMENT_END; 1783 Bool hasCData = no; 1784 int contentIndent = -1; 1785 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 1786 1787 if ( InsideHead(doc, node) ) 1788 TY_(PFlushLine)( doc, indent ); 1789 1790 PPrintTag( doc, mode, indent, node ); 1791 1792 /* use zero indent here, see http://tidy.sf.net/bug/729972 */ 1793 TY_(PFlushLine)(doc, 0); 1794 1795 if ( xhtmlOut && node->content != NULL ) 1796 { 1797 AttVal* type = attrGetTYPE(node); 1798 1799 if (AttrValueIs(type, "text/javascript")) 1800 { 1801 commentStart = JS_COMMENT_START; 1802 commentEnd = JS_COMMENT_END; 1803 } 1804 else if (AttrValueIs(type, "text/css")) 1805 { 1806 commentStart = CSS_COMMENT_START; 1807 commentEnd = CSS_COMMENT_END; 1808 } 1809 else if (AttrValueIs(type, "text/vbscript")) 1810 { 1811 commentStart = VB_COMMENT_START; 1812 commentEnd = VB_COMMENT_END; 1813 } 1814 1815 hasCData = HasCDATA(doc->lexer, node->content); 1816 1817 if (!hasCData) 1818 { 1819 uint saveWrap = WrapOff( doc ); 1820 1821 AddString( pprint, commentStart ); 1822 AddString( pprint, CDATA_START ); 1823 AddString( pprint, commentEnd ); 1824 PCondFlushLine( doc, indent ); 1825 1826 WrapOn( doc, saveWrap ); 1827 } 1828 } 1829 1830 for ( content = node->content; 1831 content != NULL; 1832 content = content->next ) 1833 { 1834 /* 1835 This is a bit odd, with the current code there can only 1836 be one child and the only caller of this function defines 1837 all these modes already... 1838 */ 1839 TY_(PPrintTree)( doc, (mode | PREFORMATTED | NOWRAP | CDATA), 1840 indent, content ); 1841 1842 if ( content == node->last ) 1843 contentIndent = TextEndsWithNewline( doc->lexer, content, CDATA ); 1844 } 1845 1846 if ( contentIndent < 0 ) 1847 { 1848 PCondFlushLine( doc, indent ); 1849 contentIndent = 0; 1850 } 1851 1852 if ( xhtmlOut && node->content != NULL ) 1853 { 1854 if ( ! hasCData ) 1855 { 1856 uint saveWrap = WrapOff( doc ); 1857 1858 AddString( pprint, commentStart ); 1859 AddString( pprint, CDATA_END ); 1860 AddString( pprint, commentEnd ); 1861 1862 WrapOn( doc, saveWrap ); 1863 PCondFlushLine( doc, indent ); 1864 } 1865 } 1866 1867 if ( node->content && pprint->indent[ 0 ].spaces != (int)indent ) 1868 { 1869 pprint->indent[ 0 ].spaces = indent; 1870 } 1871 PPrintEndTag( doc, mode, indent, node ); 1872 if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState 1873 && node->next != NULL && 1874 !( TY_(nodeHasCM)(node, CM_INLINE) || TY_(nodeIsText)(node) ) ) 1875 TY_(PFlushLine)( doc, indent ); 1876} 1877 1878 1879 1880static Bool ShouldIndent( TidyDocImpl* doc, Node *node ) 1881{ 1882 TidyTriState indentContent = cfgAutoBool( doc, TidyIndentContent ); 1883 if ( indentContent == TidyNoState ) 1884 return no; 1885 1886 if ( nodeIsTEXTAREA(node) ) 1887 return no; 1888 1889 if ( indentContent == TidyAutoState ) 1890 { 1891 if ( node->content && TY_(nodeHasCM)(node, CM_NO_INDENT) ) 1892 { 1893 for ( node = node->content; node; node = node->next ) 1894 if ( TY_(nodeHasCM)(node, CM_BLOCK) ) 1895 return yes; 1896 return no; 1897 } 1898 1899 if ( TY_(nodeHasCM)(node, CM_HEADING) ) 1900 return no; 1901 1902 if ( nodeIsHTML(node) ) 1903 return no; 1904 1905 if ( nodeIsP(node) ) 1906 return no; 1907 1908 if ( nodeIsTITLE(node) ) 1909 return no; 1910 } 1911 1912 if ( TY_(nodeHasCM)(node, CM_FIELD | CM_OBJECT) ) 1913 return yes; 1914 1915 if ( nodeIsMAP(node) ) 1916 return yes; 1917 1918 return ( !TY_(nodeHasCM)( node, CM_INLINE ) && node->content ); 1919} 1920 1921/* 1922 Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 1923 print just the content of the body element. 1924 useful when you want to reuse material from 1925 other documents. 1926 1927 -- Sebastiano Vigna <vigna@dsi.unimi.it> 1928*/ 1929void TY_(PrintBody)( TidyDocImpl* doc ) 1930{ 1931 Node *node = TY_(FindBody)( doc ); 1932 1933 if ( node ) 1934 { 1935 for ( node = node->content; node != NULL; node = node->next ) 1936 TY_(PPrintTree)( doc, NORMAL, 0, node ); 1937 } 1938} 1939 1940void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node ) 1941{ 1942 Node *content, *last; 1943 uint spaces = cfg( doc, TidyIndentSpaces ); 1944 Bool xhtml = cfgBool( doc, TidyXhtmlOut ); 1945 1946 if ( node == NULL ) 1947 return; 1948 1949 if (node->type == TextNode) 1950 { 1951 PPrintText( doc, mode, indent, node ); 1952 } 1953 else if ( node->type == CommentTag ) 1954 { 1955 PPrintComment( doc, indent, node ); 1956 } 1957 else if ( node->type == RootNode ) 1958 { 1959 for ( content = node->content; content; content = content->next ) 1960 TY_(PPrintTree)( doc, mode, indent, content ); 1961 } 1962 else if ( node->type == DocTypeTag ) 1963 PPrintDocType( doc, indent, node ); 1964 else if ( node->type == ProcInsTag) 1965 PPrintPI( doc, indent, node ); 1966 else if ( node->type == XmlDecl) 1967 PPrintXmlDecl( doc, indent, node ); 1968 else if ( node->type == CDATATag) 1969 PPrintCDATA( doc, indent, node ); 1970 else if ( node->type == SectionTag) 1971 PPrintSection( doc, indent, node ); 1972 else if ( node->type == AspTag) 1973 PPrintAsp( doc, indent, node ); 1974 else if ( node->type == JsteTag) 1975 PPrintJste( doc, indent, node ); 1976 else if ( node->type == PhpTag) 1977 PPrintPhp( doc, indent, node ); 1978 else if ( TY_(nodeCMIsEmpty)(node) || 1979 (node->type == StartEndTag && !xhtml) ) 1980 { 1981 if ( ! TY_(nodeHasCM)(node, CM_INLINE) ) 1982 PCondFlushLine( doc, indent ); 1983 1984 if ( nodeIsBR(node) && node->prev && 1985 !(nodeIsBR(node->prev) || (mode & PREFORMATTED)) && 1986 cfgBool(doc, TidyBreakBeforeBR) ) 1987 TY_(PFlushLine)( doc, indent ); 1988 1989 if ( nodeIsHR(node) ) 1990 { 1991 /* insert extra newline for classic formatting */ 1992 Bool classic = cfgBool( doc, TidyVertSpace ); 1993 if (classic && node->parent && node->parent->content != node) 1994 { 1995 TY_(PFlushLine)( doc, indent ); 1996 } 1997 } 1998 1999 PPrintTag( doc, mode, indent, node ); 2000 2001 if (node->next) 2002 { 2003 if (nodeIsPARAM(node) || nodeIsAREA(node)) 2004 PCondFlushLine(doc, indent); 2005 else if ((nodeIsBR(node) && !(mode & PREFORMATTED)) 2006 || nodeIsHR(node)) 2007 TY_(PFlushLine)(doc, indent); 2008 } 2009 } 2010 else /* some kind of container element */ 2011 { 2012 if ( node->type == StartEndTag ) 2013 node->type = StartTag; 2014 2015 if ( node->tag && 2016 (node->tag->parser == TY_(ParsePre) || nodeIsTEXTAREA(node)) ) 2017 { 2018 Bool classic = cfgBool( doc, TidyVertSpace ); 2019 uint indprev = indent; 2020 PCondFlushLine( doc, indent ); 2021 2022 PCondFlushLine( doc, indent ); 2023 2024 /* insert extra newline for classic formatting */ 2025 if (classic && node->parent && node->parent->content != node) 2026 { 2027 TY_(PFlushLine)( doc, indent ); 2028 } 2029 PPrintTag( doc, mode, indent, node ); 2030 2031 indent = 0; 2032 TY_(PFlushLine)( doc, indent ); 2033 2034 for ( content = node->content; content; content = content->next ) 2035 { 2036 TY_(PPrintTree)( doc, (mode | PREFORMATTED | NOWRAP), 2037 indent, content ); 2038 } 2039 PCondFlushLine( doc, indent ); 2040 indent = indprev; 2041 PPrintEndTag( doc, mode, indent, node ); 2042 2043 if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState 2044 && node->next != NULL ) 2045 TY_(PFlushLine)( doc, indent ); 2046 } 2047 else if ( nodeIsSTYLE(node) || nodeIsSCRIPT(node) ) 2048 { 2049 PPrintScriptStyle( doc, (mode | PREFORMATTED | NOWRAP | CDATA), 2050 indent, node ); 2051 } 2052 else if ( TY_(nodeCMIsInline)(node) ) 2053 { 2054 if ( cfgBool(doc, TidyMakeClean) ) 2055 { 2056 /* replace <nobr>...</nobr> by or   etc. */ 2057 if ( nodeIsNOBR(node) ) 2058 { 2059 for ( content = node->content; 2060 content != NULL; 2061 content = content->next) 2062 TY_(PPrintTree)( doc, mode|NOWRAP, indent, content ); 2063 return; 2064 } 2065 } 2066 2067 /* otherwise a normal inline element */ 2068 PPrintTag( doc, mode, indent, node ); 2069 2070 /* indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET */ 2071 if ( ShouldIndent(doc, node) ) 2072 { 2073 indent += spaces; 2074 PCondFlushLine( doc, indent ); 2075 2076 for ( content = node->content; 2077 content != NULL; 2078 content = content->next ) 2079 TY_(PPrintTree)( doc, mode, indent, content ); 2080 2081 indent -= spaces; 2082 PCondFlushLine( doc, indent ); 2083 /* PCondFlushLine( doc, indent ); */ 2084 } 2085 else 2086 { 2087 for ( content = node->content; 2088 content != NULL; 2089 content = content->next ) 2090 TY_(PPrintTree)( doc, mode, indent, content ); 2091 } 2092 PPrintEndTag( doc, mode, indent, node ); 2093 } 2094 else /* other tags */ 2095 { 2096 Bool indcont = ( cfgAutoBool(doc, TidyIndentContent) != TidyNoState ); 2097 Bool indsmart = ( cfgAutoBool(doc, TidyIndentContent) == TidyAutoState ); 2098 Bool hideend = cfgBool( doc, TidyHideEndTags ); 2099 Bool classic = cfgBool( doc, TidyVertSpace ); 2100 uint contentIndent = indent; 2101 2102 /* insert extra newline for classic formatting */ 2103 if (classic && node->parent && node->parent->content != node && !nodeIsHTML(node)) 2104 { 2105 TY_(PFlushLine)( doc, indent ); 2106 } 2107 2108 if ( ShouldIndent(doc, node) ) 2109 contentIndent += spaces; 2110 2111 PCondFlushLine( doc, indent ); 2112 if ( indsmart && node->prev != NULL ) 2113 TY_(PFlushLine)( doc, indent ); 2114 2115 /* do not omit elements with attributes */ 2116 if ( !hideend || !TY_(nodeHasCM)(node, CM_OMITST) || 2117 node->attributes != NULL ) 2118 { 2119 PPrintTag( doc, mode, indent, node ); 2120 2121 if ( ShouldIndent(doc, node) ) 2122 { 2123 /* fix for bug 530791, don't wrap after */ 2124 /* <li> if first child is text node */ 2125 if (!(nodeIsLI(node) && TY_(nodeIsText)(node->content))) 2126 PCondFlushLine( doc, contentIndent ); 2127 } 2128 else if ( TY_(nodeHasCM)(node, CM_HTML) || nodeIsNOFRAMES(node) || 2129 (TY_(nodeHasCM)(node, CM_HEAD) && !nodeIsTITLE(node)) ) 2130 TY_(PFlushLine)( doc, contentIndent ); 2131 } 2132 2133 last = NULL; 2134 for ( content = node->content; content; content = content->next ) 2135 { 2136 /* kludge for naked text before block level tag */ 2137 if ( last && !indcont && TY_(nodeIsText)(last) && 2138 content->tag && !TY_(nodeHasCM)(content, CM_INLINE) ) 2139 { 2140 /* TY_(PFlushLine)(fout, indent); */ 2141 TY_(PFlushLine)( doc, contentIndent ); 2142 } 2143 2144 TY_(PPrintTree)( doc, mode, contentIndent, content ); 2145 last = content; 2146 } 2147 2148 /* don't flush line for td and th */ 2149 if ( ShouldIndent(doc, node) || 2150 ( !hideend && 2151 ( TY_(nodeHasCM)(node, CM_HTML) || 2152 nodeIsNOFRAMES(node) || 2153 (TY_(nodeHasCM)(node, CM_HEAD) && !nodeIsTITLE(node)) 2154 ) 2155 ) 2156 ) 2157 { 2158 PCondFlushLine( doc, indent ); 2159 if ( !hideend || !TY_(nodeHasCM)(node, CM_OPT) ) 2160 { 2161 PPrintEndTag( doc, mode, indent, node ); 2162 /* TY_(PFlushLine)( doc, indent ); */ 2163 } 2164 } 2165 else 2166 { 2167 if ( !hideend || !TY_(nodeHasCM)(node, CM_OPT) ) 2168 { 2169 /* newline before endtag for classic formatting */ 2170 if ( classic && !HasMixedContent(node) ) 2171 TY_(PFlushLine)( doc, indent ); 2172 PPrintEndTag( doc, mode, indent, node ); 2173 } 2174 } 2175 2176 if (!indcont && !hideend && !nodeIsHTML(node) && !classic) 2177 TY_(PFlushLine)( doc, indent ); 2178 else if (classic && node->next != NULL && TY_(nodeHasCM)(node, CM_LIST|CM_DEFLIST|CM_TABLE|CM_BLOCK/*|CM_HEADING*/)) 2179 TY_(PFlushLine)( doc, indent ); 2180 } 2181 } 2182} 2183 2184void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node ) 2185{ 2186 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut ); 2187 if (node == NULL) 2188 return; 2189 2190 if ( node->type == TextNode) 2191 { 2192 PPrintText( doc, mode, indent, node ); 2193 } 2194 else if ( node->type == CommentTag ) 2195 { 2196 PCondFlushLine( doc, indent ); 2197 PPrintComment( doc, indent, node); 2198 /* PCondFlushLine( doc, 0 ); */ 2199 } 2200 else if ( node->type == RootNode ) 2201 { 2202 Node *content; 2203 for ( content = node->content; 2204 content != NULL; 2205 content = content->next ) 2206 TY_(PPrintXMLTree)( doc, mode, indent, content ); 2207 } 2208 else if ( node->type == DocTypeTag ) 2209 PPrintDocType( doc, indent, node ); 2210 else if ( node->type == ProcInsTag ) 2211 PPrintPI( doc, indent, node ); 2212 else if ( node->type == XmlDecl ) 2213 PPrintXmlDecl( doc, indent, node ); 2214 else if ( node->type == CDATATag ) 2215 PPrintCDATA( doc, indent, node ); 2216 else if ( node->type == SectionTag ) 2217 PPrintSection( doc, indent, node ); 2218 else if ( node->type == AspTag ) 2219 PPrintAsp( doc, indent, node ); 2220 else if ( node->type == JsteTag) 2221 PPrintJste( doc, indent, node ); 2222 else if ( node->type == PhpTag) 2223 PPrintPhp( doc, indent, node ); 2224 else if ( TY_(nodeHasCM)(node, CM_EMPTY) || 2225 (node->type == StartEndTag && !xhtmlOut) ) 2226 { 2227 PCondFlushLine( doc, indent ); 2228 PPrintTag( doc, mode, indent, node ); 2229 /* TY_(PFlushLine)( doc, indent ); */ 2230 } 2231 else /* some kind of container element */ 2232 { 2233 uint spaces = cfg( doc, TidyIndentSpaces ); 2234 Node *content; 2235 Bool mixed = no; 2236 uint cindent; 2237 2238 for ( content = node->content; content; content = content->next ) 2239 { 2240 if ( TY_(nodeIsText)(content) ) 2241 { 2242 mixed = yes; 2243 break; 2244 } 2245 } 2246 2247 PCondFlushLine( doc, indent ); 2248 2249 if ( TY_(XMLPreserveWhiteSpace)(doc, node) ) 2250 { 2251 indent = 0; 2252 mixed = no; 2253 cindent = 0; 2254 } 2255 else if (mixed) 2256 cindent = indent; 2257 else 2258 cindent = indent + spaces; 2259 2260 PPrintTag( doc, mode, indent, node ); 2261 if ( !mixed && node->content ) 2262 TY_(PFlushLine)( doc, cindent ); 2263 2264 for ( content = node->content; content; content = content->next ) 2265 TY_(PPrintXMLTree)( doc, mode, cindent, content ); 2266 2267 if ( !mixed && node->content ) 2268 PCondFlushLine( doc, indent ); 2269 2270 PPrintEndTag( doc, mode, indent, node ); 2271 /* PCondFlushLine( doc, indent ); */ 2272 } 2273} 2274 2275/* 2276 * local variables: 2277 * mode: c 2278 * indent-tabs-mode: nil 2279 * c-basic-offset: 4 2280 * eval: (c-set-offset 'substatement-open 0) 2281 * end: 2282 */ 2283