1/* 2 tidy.c - HTML TidyLib command line driver 3 4 Copyright (c) 1998-2006 World Wide Web Consortium 5 (Massachusetts Institute of Technology, European Research 6 Consortium for Informatics and Mathematics, Keio University). 7 All Rights Reserved. 8 9 CVS Info : 10 11 $Author$ 12 $Date$ 13 $Revision$ 14*/ 15 16#include "tidy.h" 17 18static FILE* errout = NULL; /* set to stderr */ 19/* static FILE* txtout = NULL; */ /* set to stdout */ 20 21static Bool samefile( ctmbstr filename1, ctmbstr filename2 ) 22{ 23#if FILENAMES_CASE_SENSITIVE 24 return ( strcmp( filename1, filename2 ) == 0 ); 25#else 26 return ( strcasecmp( filename1, filename2 ) == 0 ); 27#endif 28} 29 30static const char *cutToWhiteSpace(const char *s, uint offset, char *sbuf) 31{ 32 if (!s) 33 { 34 sbuf[0] = '\0'; 35 return NULL; 36 } 37 else if (strlen(s) <= offset) 38 { 39 strcpy(sbuf,s); 40 sbuf[offset] = '\0'; 41 return NULL; 42 } 43 else 44 { 45 uint j, l, n; 46 j = offset; 47 while(j && s[j] != ' ') 48 --j; 49 l = j; 50 n = j+1; 51 /* no white space */ 52 if (j==0) 53 { 54 l = offset; 55 n = offset; 56 } 57 strncpy(sbuf,s,l); 58 sbuf[l] = '\0'; 59 return s+n; 60 } 61} 62 63static void print2Columns( const char* fmt, uint l1, uint l2, 64 const char *c1, const char *c2 ) 65{ 66 const char *pc1=c1, *pc2=c2; 67 char *c1buf = (char *)malloc(l1+1); 68 char *c2buf = (char *)malloc(l2+1); 69 70 do 71 { 72 pc1 = cutToWhiteSpace(pc1, l1, c1buf); 73 pc2 = cutToWhiteSpace(pc2, l2, c2buf); 74 printf(fmt, 75 c1buf[0]!='\0'?c1buf:"", 76 c2buf[0]!='\0'?c2buf:""); 77 } while (pc1 || pc2); 78 free(c1buf); 79 free(c2buf); 80} 81 82static void print3Columns( const char* fmt, uint l1, uint l2, uint l3, 83 const char *c1, const char *c2, const char *c3 ) 84{ 85 const char *pc1=c1, *pc2=c2, *pc3=c3; 86 char *c1buf = (char *)malloc(l1+1); 87 char *c2buf = (char *)malloc(l2+1); 88 char *c3buf = (char *)malloc(l3+1); 89 90 do 91 { 92 pc1 = cutToWhiteSpace(pc1, l1, c1buf); 93 pc2 = cutToWhiteSpace(pc2, l2, c2buf); 94 pc3 = cutToWhiteSpace(pc3, l3, c3buf); 95 printf(fmt, 96 c1buf[0]!='\0'?c1buf:"", 97 c2buf[0]!='\0'?c2buf:"", 98 c3buf[0]!='\0'?c3buf:""); 99 } while (pc1 || pc2 || pc3); 100 free(c1buf); 101 free(c2buf); 102 free(c3buf); 103} 104 105static const char helpfmt[] = " %-19.19s %-58.58s\n"; 106static const char helpul[] 107 = "-----------------------------------------------------------------"; 108static const char fmt[] = "%-27.27s %-9.9s %-40.40s\n"; 109static const char valfmt[] = "%-27.27s %-9.9s %-1.1s%-39.39s\n"; 110static const char ul[] 111 = "================================================================="; 112 113typedef enum 114{ 115 CmdOptFileManip, 116 CmdOptCatFIRST = CmdOptFileManip, 117 CmdOptProcDir, 118 CmdOptCharEnc, 119 CmdOptMisc, 120 CmdOptCatLAST, 121} CmdOptCategory; 122 123static const struct { 124 ctmbstr mnemonic; 125 ctmbstr name; 126} cmdopt_catname[] = { 127 { "file-manip", "File manipulation" }, 128 { "process-directives", "Processing directives" }, 129 { "char-encoding", "Character encodings" }, 130 { "misc", "Miscellaneous" } 131}; 132 133typedef struct { 134 ctmbstr name1; /**< Name */ 135 ctmbstr desc; /**< Description */ 136 ctmbstr eqconfig; /**< Equivalent configuration option */ 137 CmdOptCategory cat; /**< Category */ 138 ctmbstr name2; /**< Name */ 139 ctmbstr name3; /**< Name */ 140} CmdOptDesc; 141 142static const CmdOptDesc cmdopt_defs[] = { 143 { "-output <file>", 144 "write output to the specified <file>", 145 "output-file: <file>", CmdOptFileManip, "-o <file>" }, 146 { "-config <file>", 147 "set configuration options from the specified <file>", 148 NULL, CmdOptFileManip }, 149 { "-file <file>", 150 "write errors to the specified <file>", 151 "error-file: <file>", CmdOptFileManip, "-f <file>" }, 152 { "-modify", 153 "modify the original input files", 154 "write-back: yes", CmdOptFileManip, "-m" }, 155 { "-indent", 156 "indent element content", 157 "indent: auto", CmdOptProcDir, "-i" }, 158 { "-wrap <column>", 159 "wrap text at the specified <column>" 160 ". 0 is assumed if <column> is missing. " 161 "When this option is omitted, the default of the configuration option " 162 "\"wrap\" applies.", 163 "wrap: <column>", CmdOptProcDir, "-w <column>" }, 164 { "-upper", 165 "force tags to upper case", 166 "uppercase-tags: yes", CmdOptProcDir, "-u" }, 167 { "-clean", 168 "replace FONT, NOBR and CENTER tags by CSS", 169 "clean: yes", CmdOptProcDir, "-c" }, 170 { "-bare", 171 "strip out smart quotes and em dashes, etc.", 172 "bare: yes", CmdOptProcDir, "-b" }, 173 { "-numeric", 174 "output numeric rather than named entities", 175 "numeric-entities: yes", CmdOptProcDir, "-n" }, 176 { "-errors", 177 "only show errors", 178 "markup: no", CmdOptProcDir, "-e" }, 179 { "-quiet", 180 "suppress nonessential output", 181 "quiet: yes", CmdOptProcDir, "-q" }, 182 { "-omit", 183 "omit optional end tags", 184 "hide-endtags: yes", CmdOptProcDir }, 185 { "-xml", 186 "specify the input is well formed XML", 187 "input-xml: yes", CmdOptProcDir }, 188 { "-asxml", 189 "convert HTML to well formed XHTML", 190 "output-xhtml: yes", CmdOptProcDir, "-asxhtml" }, 191 { "-ashtml", 192 "force XHTML to well formed HTML", 193 "output-html: yes", CmdOptProcDir }, 194#if SUPPORT_ACCESSIBILITY_CHECKS 195 { "-access <level>", 196 "do additional accessibility checks (<level> = 0, 1, 2, 3)" 197 ". 0 is assumed if <level> is missing.", 198 "accessibility-check: <level>", CmdOptProcDir }, 199#endif 200 { "-raw", 201 "output values above 127 without conversion to entities", 202 NULL, CmdOptCharEnc }, 203 { "-ascii", 204 "use ISO-8859-1 for input, US-ASCII for output", 205 NULL, CmdOptCharEnc }, 206 { "-latin0", 207 "use ISO-8859-15 for input, US-ASCII for output", 208 NULL, CmdOptCharEnc }, 209 { "-latin1", 210 "use ISO-8859-1 for both input and output", 211 NULL, CmdOptCharEnc }, 212#ifndef NO_NATIVE_ISO2022_SUPPORT 213 { "-iso2022", 214 "use ISO-2022 for both input and output", 215 NULL, CmdOptCharEnc }, 216#endif 217 { "-utf8", 218 "use UTF-8 for both input and output", 219 NULL, CmdOptCharEnc }, 220 { "-mac", 221 "use MacRoman for input, US-ASCII for output", 222 NULL, CmdOptCharEnc }, 223 { "-win1252", 224 "use Windows-1252 for input, US-ASCII for output", 225 NULL, CmdOptCharEnc }, 226 { "-ibm858", 227 "use IBM-858 (CP850+Euro) for input, US-ASCII for output", 228 NULL, CmdOptCharEnc }, 229#if SUPPORT_UTF16_ENCODINGS 230 { "-utf16le", 231 "use UTF-16LE for both input and output", 232 NULL, CmdOptCharEnc }, 233 { "-utf16be", 234 "use UTF-16BE for both input and output", 235 NULL, CmdOptCharEnc }, 236 { "-utf16", 237 "use UTF-16 for both input and output", 238 NULL, CmdOptCharEnc }, 239#endif 240#if SUPPORT_ASIAN_ENCODINGS /* #431953 - RJ */ 241 { "-big5", 242 "use Big5 for both input and output", 243 NULL, CmdOptCharEnc }, 244 { "-shiftjis", 245 "use Shift_JIS for both input and output", 246 NULL, CmdOptCharEnc }, 247 { "-language <lang>", 248 "set the two-letter language code <lang> (for future use)", 249 "language: <lang>", CmdOptCharEnc }, 250#endif 251 { "-version", 252 "show the version of Tidy", 253 NULL, CmdOptMisc, "-v" }, 254 { "-help", 255 "list the command line options", 256 NULL, CmdOptMisc, "-h", "-?" }, 257 { "-xml-help", 258 "list the command line options in XML format", 259 NULL, CmdOptMisc }, 260 { "-help-config", 261 "list all configuration options", 262 NULL, CmdOptMisc }, 263 { "-xml-config", 264 "list all configuration options in XML format", 265 NULL, CmdOptMisc }, 266 { "-show-config", 267 "list the current configuration settings", 268 NULL, CmdOptMisc }, 269 { NULL, NULL, NULL, CmdOptMisc } 270}; 271 272static tmbstr get_option_names( const CmdOptDesc* pos ) 273{ 274 tmbstr name; 275 uint len = strlen(pos->name1); 276 if (pos->name2) 277 len += 2+strlen(pos->name2); 278 if (pos->name3) 279 len += 2+strlen(pos->name3); 280 281 name = (tmbstr)malloc(len+1); 282 strcpy(name, pos->name1); 283 if (pos->name2) 284 { 285 strcat(name, ", "); 286 strcat(name, pos->name2); 287 } 288 if (pos->name3) 289 { 290 strcat(name, ", "); 291 strcat(name, pos->name3); 292 } 293 return name; 294} 295 296static tmbstr get_escaped_name( ctmbstr name ) 297{ 298 tmbstr escpName; 299 char aux[2]; 300 uint len = 0; 301 ctmbstr c; 302 for(c=name; *c!='\0'; ++c) 303 switch(*c) 304 { 305 case '<': 306 case '>': 307 len += 4; 308 break; 309 case '"': 310 len += 6; 311 break; 312 default: 313 len += 1; 314 break; 315 } 316 317 escpName = (tmbstr)malloc(len+1); 318 escpName[0] = '\0'; 319 320 aux[1] = '\0'; 321 for(c=name; *c!='\0'; ++c) 322 switch(*c) 323 { 324 case '<': 325 strcat(escpName, "<"); 326 break; 327 case '>': 328 strcat(escpName, ">"); 329 break; 330 case '"': 331 strcat(escpName, """); 332 break; 333 default: 334 aux[0] = *c; 335 strcat(escpName, aux); 336 break; 337 } 338 339 return escpName; 340} 341 342static void print_help_option( void ) 343{ 344 CmdOptCategory cat = CmdOptCatFIRST; 345 const CmdOptDesc* pos = cmdopt_defs; 346 347 for( cat=CmdOptCatFIRST; cat!=CmdOptCatLAST; ++cat) 348 { 349 size_t len = strlen(cmdopt_catname[cat].name); 350 printf("%s\n", cmdopt_catname[cat].name ); 351 printf("%*.*s\n", (int)len, (int)len, helpul ); 352 for( pos=cmdopt_defs; pos->name1; ++pos) 353 { 354 tmbstr name; 355 if (pos->cat != cat) 356 continue; 357 name = get_option_names( pos ); 358 print2Columns( helpfmt, 19, 58, name, pos->desc ); 359 free(name); 360 } 361 printf("\n"); 362 } 363} 364 365static void print_xml_help_option_element( ctmbstr element, ctmbstr name ) 366{ 367 tmbstr escpName; 368 if (!name) 369 return; 370 printf(" <%s>%s</%s>\n", element, escpName = get_escaped_name(name), 371 element); 372 free(escpName); 373} 374 375static void print_xml_help_option( void ) 376{ 377 const CmdOptDesc* pos = cmdopt_defs; 378 379 for( pos=cmdopt_defs; pos->name1; ++pos) 380 { 381 printf(" <option class=\"%s\">\n", cmdopt_catname[pos->cat].mnemonic ); 382 print_xml_help_option_element("name", pos->name1); 383 print_xml_help_option_element("name", pos->name2); 384 print_xml_help_option_element("name", pos->name3); 385 print_xml_help_option_element("description", pos->desc); 386 if (pos->eqconfig) 387 print_xml_help_option_element("eqconfig", pos->eqconfig); 388 else 389 printf(" <eqconfig />\n"); 390 printf(" </option>\n"); 391 } 392} 393 394static void xml_help( void ) 395{ 396 printf( "<?xml version=\"1.0\"?>\n" 397 "<cmdline version=\"%s\">\n", tidyReleaseDate()); 398 print_xml_help_option(); 399 printf( "</cmdline>\n" ); 400} 401 402static void help( ctmbstr prog ) 403{ 404 printf( "%s [option...] [file...] [option...] [file...]\n", prog ); 405 printf( "Utility to clean up and pretty print HTML/XHTML/XML\n"); 406 printf( "see http://tidy.sourceforge.net/\n"); 407 printf( "\n"); 408 409#ifdef PLATFORM_NAME 410 printf( "Options for HTML Tidy for %s released on %s:\n", 411 PLATFORM_NAME, tidyReleaseDate() ); 412#else 413 printf( "Options for HTML Tidy released on %s:\n", tidyReleaseDate() ); 414#endif 415 printf( "\n"); 416 417 print_help_option(); 418 419 printf( "Use --blah blarg for any configuration option \"blah\" with argument \"blarg\"\n"); 420 printf( "\n"); 421 422 printf( "Input/Output default to stdin/stdout respectively\n"); 423 printf( "Single letter options apart from -f may be combined\n"); 424 printf( "as in: tidy -f errs.txt -imu foo.html\n"); 425 printf( "For further info on HTML see http://www.w3.org/MarkUp\n"); 426 printf( "\n"); 427} 428 429static Bool isAutoBool( TidyOption topt ) 430{ 431 TidyIterator pos; 432 ctmbstr def; 433 434 if ( tidyOptGetType( topt ) != TidyInteger) 435 return no; 436 437 pos = tidyOptGetPickList( topt ); 438 while ( pos ) 439 { 440 def = tidyOptGetNextPick( topt, &pos ); 441 if (0==strcmp(def,"yes")) 442 return yes; 443 } 444 return no; 445} 446 447static 448ctmbstr ConfigCategoryName( TidyConfigCategory id ) 449{ 450 switch( id ) 451 { 452 case TidyMarkup: 453 return "markup"; 454 case TidyDiagnostics: 455 return "diagnostics"; 456 case TidyPrettyPrint: 457 return "print"; 458 case TidyEncoding: 459 return "encoding"; 460 case TidyMiscellaneous: 461 return "misc"; 462 } 463 fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", (int)id); 464 assert(0); 465 abort(); 466} 467 468/* Description of an option */ 469typedef struct { 470 ctmbstr name; /**< Name */ 471 ctmbstr cat; /**< Category */ 472 ctmbstr type; /**< "String, ... */ 473 ctmbstr vals; /**< Potential values. If NULL, use an external function */ 474 ctmbstr def; /**< default */ 475 tmbchar tempdefs[80]; /**< storage for default such as integer */ 476 Bool haveVals; /**< if yes, vals is valid */ 477} OptionDesc; 478 479typedef void (*OptionFunc)( TidyDoc, TidyOption, OptionDesc * ); 480 481 482/* Create description "d" related to "opt" */ 483static 484void GetOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d ) 485{ 486 TidyOptionId optId = tidyOptGetId( topt ); 487 TidyOptionType optTyp = tidyOptGetType( topt ); 488 489 d->name = tidyOptGetName( topt ); 490 d->cat = ConfigCategoryName( tidyOptGetCategory( topt ) ); 491 d->vals = NULL; 492 d->def = NULL; 493 d->haveVals = yes; 494 495 /* Handle special cases first. 496 */ 497 switch ( optId ) 498 { 499 case TidyDuplicateAttrs: 500 case TidyNewline: 501 case TidyAccessibilityCheckLevel: 502 d->type = "enum"; 503 d->vals = NULL; 504 d->def = 505 optId==TidyNewline ? 506 "<em>Platform dependent</em>" 507 :tidyOptGetCurrPick( tdoc, optId ); 508 break; 509 510 case TidyDoctype: 511 d->type = "DocType"; 512 d->vals = NULL; 513 { 514 ctmbstr sdef = NULL; 515 sdef = tidyOptGetCurrPick( tdoc, TidyDoctypeMode ); 516 if ( !sdef || *sdef == '*' ) 517 sdef = tidyOptGetValue( tdoc, TidyDoctype ); 518 d->def = sdef; 519 } 520 break; 521 522 case TidyInlineTags: 523 case TidyBlockTags: 524 case TidyEmptyTags: 525 case TidyPreTags: 526 d->type = "Tag names"; 527 d->vals = "tagX, tagY, ..."; 528 d->def = NULL; 529 break; 530 531 case TidyCharEncoding: 532 case TidyInCharEncoding: 533 case TidyOutCharEncoding: 534 d->type = "Encoding"; 535 d->def = tidyOptGetEncName( tdoc, optId ); 536 if (!d->def) 537 d->def = "?"; 538 d->vals = NULL; 539 break; 540 541 /* General case will handle remaining */ 542 default: 543 switch ( optTyp ) 544 { 545 case TidyBoolean: 546 d->type = "Boolean"; 547 d->vals = "y/n, yes/no, t/f, true/false, 1/0"; 548 d->def = tidyOptGetCurrPick( tdoc, optId ); 549 break; 550 551 case TidyInteger: 552 if (isAutoBool(topt)) 553 { 554 d->type = "AutoBool"; 555 d->vals = "auto, y/n, yes/no, t/f, true/false, 1/0"; 556 d->def = tidyOptGetCurrPick( tdoc, optId ); 557 } 558 else 559 { 560 uint idef; 561 d->type = "Integer"; 562 if ( optId == TidyWrapLen ) 563 d->vals = "0 (no wrapping), 1, 2, ..."; 564 else 565 d->vals = "0, 1, 2, ..."; 566 567 idef = tidyOptGetInt( tdoc, optId ); 568 sprintf(d->tempdefs, "%u", idef); 569 d->def = d->tempdefs; 570 } 571 break; 572 573 case TidyString: 574 d->type = "String"; 575 d->vals = NULL; 576 d->haveVals = no; 577 d->def = tidyOptGetValue( tdoc, optId ); 578 break; 579 } 580 } 581} 582 583/* Array holding all options. Contains a trailing sentinel. */ 584typedef struct { 585 TidyOption topt[N_TIDY_OPTIONS]; 586} AllOption_t; 587 588static 589int cmpOpt(const void* e1_, const void *e2_) 590{ 591 const TidyOption* e1 = (const TidyOption*)e1_; 592 const TidyOption* e2 = (const TidyOption*)e2_; 593 return strcmp(tidyOptGetName(*e1), tidyOptGetName(*e2)); 594} 595 596static 597void getSortedOption( TidyDoc tdoc, AllOption_t *tOption ) 598{ 599 TidyIterator pos = tidyGetOptionList( tdoc ); 600 uint i = 0; 601 602 while ( pos ) 603 { 604 TidyOption topt = tidyGetNextOption( tdoc, &pos ); 605 tOption->topt[i] = topt; 606 ++i; 607 } 608 tOption->topt[i] = NULL; /* sentinel */ 609 610 qsort(tOption->topt, 611 /* Do not sort the sentinel: hence `-1' */ 612 sizeof(tOption->topt)/sizeof(tOption->topt[0])-1, 613 sizeof(tOption->topt[0]), 614 cmpOpt); 615} 616 617static void ForEachSortedOption( TidyDoc tdoc, OptionFunc OptionPrint ) 618{ 619 AllOption_t tOption; 620 const TidyOption *topt; 621 622 getSortedOption( tdoc, &tOption ); 623 for( topt = tOption.topt; *topt; ++topt) 624 { 625 OptionDesc d; 626 627 GetOption( tdoc, *topt, &d ); 628 (*OptionPrint)( tdoc, *topt, &d ); 629 } 630} 631 632static void ForEachOption( TidyDoc tdoc, OptionFunc OptionPrint ) 633{ 634 TidyIterator pos = tidyGetOptionList( tdoc ); 635 636 while ( pos ) 637 { 638 TidyOption topt = tidyGetNextOption( tdoc, &pos ); 639 OptionDesc d; 640 641 GetOption( tdoc, topt, &d ); 642 (*OptionPrint)( tdoc, topt, &d ); 643 } 644} 645 646static 647void PrintAllowedValuesFromPick( TidyOption topt ) 648{ 649 TidyIterator pos = tidyOptGetPickList( topt ); 650 Bool first = yes; 651 ctmbstr def; 652 while ( pos ) 653 { 654 if (first) 655 first = no; 656 else 657 printf(", "); 658 def = tidyOptGetNextPick( topt, &pos ); 659 printf("%s", def); 660 } 661} 662 663static 664void PrintAllowedValues( TidyOption topt, const OptionDesc *d ) 665{ 666 if (d->vals) 667 printf( "%s", d->vals ); 668 else 669 PrintAllowedValuesFromPick( topt ); 670} 671 672static 673void printXMLDescription( TidyDoc tdoc, TidyOption topt ) 674{ 675 ctmbstr doc = tidyOptGetDoc( tdoc, topt ); 676 677 if (doc) 678 printf(" <description>%s</description>\n", doc); 679 else 680 { 681 printf(" <description />\n"); 682 fprintf(stderr, "Warning: option `%s' is not documented.\n", 683 tidyOptGetName( topt )); 684 } 685} 686 687static 688void printXMLCrossRef( TidyDoc tdoc, TidyOption topt ) 689{ 690 TidyOption optLinked; 691 TidyIterator pos = tidyOptGetDocLinksList(tdoc, topt); 692 while( pos ) 693 { 694 optLinked = tidyOptGetNextDocLinks(tdoc, &pos ); 695 printf(" <seealso>%s</seealso>\n",tidyOptGetName(optLinked)); 696 } 697} 698 699static 700void printXMLOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d ) 701{ 702 if ( tidyOptIsReadOnly(topt) ) 703 return; 704 705 printf( " <option class=\"%s\">\n", d->cat ); 706 printf (" <name>%s</name>\n",d->name); 707 printf (" <type>%s</type>\n",d->type); 708 if (d->def) 709 printf(" <default>%s</default>\n",d->def); 710 else 711 printf(" <default />\n"); 712 if (d->haveVals) 713 { 714 printf(" <example>"); 715 PrintAllowedValues( topt, d ); 716 printf("</example>\n"); 717 } 718 else 719 { 720 printf(" <example />\n"); 721 } 722 printXMLDescription( tdoc, topt ); 723 printXMLCrossRef( tdoc, topt ); 724 printf( " </option>\n" ); 725} 726 727static void XMLoptionhelp( TidyDoc tdoc ) 728{ 729 printf( "<?xml version=\"1.0\"?>\n" 730 "<config version=\"%s\">\n", tidyReleaseDate()); 731 ForEachOption( tdoc, printXMLOption ); 732 printf( "</config>\n" ); 733} 734 735static 736tmbstr GetAllowedValuesFromPick( TidyOption topt ) 737{ 738 TidyIterator pos; 739 Bool first; 740 ctmbstr def; 741 uint len = 0; 742 tmbstr val; 743 744 pos = tidyOptGetPickList( topt ); 745 first = yes; 746 while ( pos ) 747 { 748 if (first) 749 first = no; 750 else 751 len += 2; 752 def = tidyOptGetNextPick( topt, &pos ); 753 len += strlen(def); 754 } 755 val = (tmbstr)malloc(len+1); 756 val[0] = '\0'; 757 pos = tidyOptGetPickList( topt ); 758 first = yes; 759 while ( pos ) 760 { 761 if (first) 762 first = no; 763 else 764 strcat(val, ", "); 765 def = tidyOptGetNextPick( topt, &pos ); 766 strcat(val, def); 767 } 768 return val; 769} 770 771static 772tmbstr GetAllowedValues( TidyOption topt, const OptionDesc *d ) 773{ 774 if (d->vals) 775 { 776 tmbstr val = (tmbstr)malloc(1+strlen(d->vals)); 777 strcpy(val, d->vals); 778 return val; 779 } 780 else 781 return GetAllowedValuesFromPick( topt ); 782} 783 784static 785void printOption( TidyDoc ARG_UNUSED(tdoc), TidyOption topt, 786 OptionDesc *d ) 787{ 788 if ( tidyOptIsReadOnly(topt) ) 789 return; 790 791 if ( *d->name || *d->type ) 792 { 793 ctmbstr pval = d->vals; 794 tmbstr val = NULL; 795 if (!d->haveVals) 796 { 797 pval = "-"; 798 } 799 else if (pval == NULL) 800 { 801 val = GetAllowedValues( topt, d); 802 pval = val; 803 } 804 print3Columns( fmt, 27, 9, 40, d->name, d->type, pval ); 805 if (val) 806 free(val); 807 } 808} 809 810static void optionhelp( TidyDoc tdoc ) 811{ 812 printf( "\nHTML Tidy Configuration Settings\n\n" ); 813 printf( "Within a file, use the form:\n\n" ); 814 printf( "wrap: 72\n" ); 815 printf( "indent: no\n\n" ); 816 printf( "When specified on the command line, use the form:\n\n" ); 817 printf( "--wrap 72 --indent no\n\n"); 818 819 printf( fmt, "Name", "Type", "Allowable values" ); 820 printf( fmt, ul, ul, ul ); 821 822 ForEachSortedOption( tdoc, printOption ); 823} 824 825static 826void printOptionValues( TidyDoc ARG_UNUSED(tdoc), TidyOption topt, 827 OptionDesc *d ) 828{ 829 TidyOptionId optId = tidyOptGetId( topt ); 830 ctmbstr ro = tidyOptIsReadOnly( topt ) ? "*" : "" ; 831 832 switch ( optId ) 833 { 834 case TidyInlineTags: 835 case TidyBlockTags: 836 case TidyEmptyTags: 837 case TidyPreTags: 838 { 839 TidyIterator pos = tidyOptGetDeclTagList( tdoc ); 840 while ( pos ) 841 { 842 d->def = tidyOptGetNextDeclTag(tdoc, optId, &pos); 843 if ( pos ) 844 { 845 if ( *d->name ) 846 printf( valfmt, d->name, d->type, ro, d->def ); 847 else 848 printf( fmt, d->name, d->type, d->def ); 849 d->name = ""; 850 d->type = ""; 851 } 852 } 853 } 854 break; 855 case TidyNewline: 856 d->def = tidyOptGetCurrPick( tdoc, optId ); 857 break; 858 } 859 860 /* fix for http://tidy.sf.net/bug/873921 */ 861 if ( *d->name || *d->type || (d->def && *d->def) ) 862 { 863 if ( ! d->def ) 864 d->def = ""; 865 if ( *d->name ) 866 printf( valfmt, d->name, d->type, ro, d->def ); 867 else 868 printf( fmt, d->name, d->type, d->def ); 869 } 870} 871 872static void optionvalues( TidyDoc tdoc ) 873{ 874 printf( "\nConfiguration File Settings:\n\n" ); 875 printf( fmt, "Name", "Type", "Current Value" ); 876 printf( fmt, ul, ul, ul ); 877 878 ForEachSortedOption( tdoc, printOptionValues ); 879 880 printf( "\n\nValues marked with an *asterisk are calculated \n" 881 "internally by HTML Tidy\n\n" ); 882} 883 884static void version( void ) 885{ 886#ifdef PLATFORM_NAME 887 printf( "HTML Tidy for %s released on %s\n", 888 PLATFORM_NAME, tidyReleaseDate() ); 889#else 890 printf( "HTML Tidy released on %s\n", tidyReleaseDate() ); 891#endif 892} 893 894static void unknownOption( uint c ) 895{ 896 fprintf( errout, "HTML Tidy: unknown option: %c\n", (char)c ); 897} 898 899int main( int argc, char** argv ) 900{ 901 ctmbstr prog = argv[0]; 902 ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL; 903 TidyDoc tdoc = tidyCreate(); 904 int status = 0; 905 906 uint contentErrors = 0; 907 uint contentWarnings = 0; 908 uint accessWarnings = 0; 909 910 errout = stderr; /* initialize to stderr */ 911 status = 0; 912 913#ifdef CONFIG_FILE 914 if ( tidyFileExists(CONFIG_FILE) ) 915 { 916 status = tidyLoadConfig( tdoc, CONFIG_FILE ); 917 if ( status != 0 ) 918 fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", CONFIG_FILE, status); 919 } 920#endif /* CONFIG_FILE */ 921 922 /* look for env var "HTML_TIDY" */ 923 /* then for ~/.tidyrc (on platforms defining $HOME) */ 924 925 if ( cfgfil = getenv("HTML_TIDY") ) 926 { 927 status = tidyLoadConfig( tdoc, cfgfil ); 928 if ( status != 0 ) 929 fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status); 930 } 931#ifdef USER_CONFIG_FILE 932 else if ( tidyFileExists(USER_CONFIG_FILE) ) 933 { 934 status = tidyLoadConfig( tdoc, USER_CONFIG_FILE ); 935 if ( status != 0 ) 936 fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", USER_CONFIG_FILE, status); 937 } 938#endif /* USER_CONFIG_FILE */ 939 940 /* read command line */ 941 while ( argc > 0 ) 942 { 943 if (argc > 1 && argv[1][0] == '-') 944 { 945 /* support -foo and --foo */ 946 ctmbstr arg = argv[1] + 1; 947 948 if ( strcasecmp(arg, "xml") == 0) 949 tidyOptSetBool( tdoc, TidyXmlTags, yes ); 950 951 else if ( strcasecmp(arg, "asxml") == 0 || 952 strcasecmp(arg, "asxhtml") == 0 ) 953 { 954 tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); 955 } 956 else if ( strcasecmp(arg, "ashtml") == 0 ) 957 tidyOptSetBool( tdoc, TidyHtmlOut, yes ); 958 959 else if ( strcasecmp(arg, "indent") == 0 ) 960 { 961 tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); 962 if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) 963 tidyOptResetToDefault( tdoc, TidyIndentSpaces ); 964 } 965 else if ( strcasecmp(arg, "omit") == 0 ) 966 tidyOptSetBool( tdoc, TidyHideEndTags, yes ); 967 968 else if ( strcasecmp(arg, "upper") == 0 ) 969 tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); 970 971 else if ( strcasecmp(arg, "clean") == 0 ) 972 tidyOptSetBool( tdoc, TidyMakeClean, yes ); 973 974 else if ( strcasecmp(arg, "bare") == 0 ) 975 tidyOptSetBool( tdoc, TidyMakeBare, yes ); 976 977 else if ( strcasecmp(arg, "raw") == 0 || 978 strcasecmp(arg, "ascii") == 0 || 979 strcasecmp(arg, "latin0") == 0 || 980 strcasecmp(arg, "latin1") == 0 || 981 strcasecmp(arg, "utf8") == 0 || 982#ifndef NO_NATIVE_ISO2022_SUPPORT 983 strcasecmp(arg, "iso2022") == 0 || 984#endif 985#if SUPPORT_UTF16_ENCODINGS 986 strcasecmp(arg, "utf16le") == 0 || 987 strcasecmp(arg, "utf16be") == 0 || 988 strcasecmp(arg, "utf16") == 0 || 989#endif 990#if SUPPORT_ASIAN_ENCODINGS 991 strcasecmp(arg, "shiftjis") == 0 || 992 strcasecmp(arg, "big5") == 0 || 993#endif 994 strcasecmp(arg, "mac") == 0 || 995 strcasecmp(arg, "win1252") == 0 || 996 strcasecmp(arg, "ibm858") == 0 ) 997 { 998 tidySetCharEncoding( tdoc, arg ); 999 } 1000 else if ( strcasecmp(arg, "numeric") == 0 ) 1001 tidyOptSetBool( tdoc, TidyNumEntities, yes ); 1002 1003 else if ( strcasecmp(arg, "modify") == 0 || 1004 strcasecmp(arg, "change") == 0 || /* obsolete */ 1005 strcasecmp(arg, "update") == 0 ) /* obsolete */ 1006 { 1007 tidyOptSetBool( tdoc, TidyWriteBack, yes ); 1008 } 1009 else if ( strcasecmp(arg, "errors") == 0 ) 1010 tidyOptSetBool( tdoc, TidyShowMarkup, no ); 1011 1012 else if ( strcasecmp(arg, "quiet") == 0 ) 1013 tidyOptSetBool( tdoc, TidyQuiet, yes ); 1014 1015 else if ( strcasecmp(arg, "help") == 0 || 1016 strcasecmp(arg, "h") == 0 || *arg == '?' ) 1017 { 1018 help( prog ); 1019 tidyRelease( tdoc ); 1020 return 0; /* success */ 1021 } 1022 else if ( strcasecmp(arg, "xml-help") == 0) 1023 { 1024 xml_help( ); 1025 tidyRelease( tdoc ); 1026 return 0; /* success */ 1027 } 1028 else if ( strcasecmp(arg, "help-config") == 0 ) 1029 { 1030 optionhelp( tdoc ); 1031 tidyRelease( tdoc ); 1032 return 0; /* success */ 1033 } 1034 else if ( strcasecmp(arg, "xml-config") == 0 ) 1035 { 1036 XMLoptionhelp( tdoc ); 1037 tidyRelease( tdoc ); 1038 return 0; /* success */ 1039 } 1040 else if ( strcasecmp(arg, "show-config") == 0 ) 1041 { 1042 optionvalues( tdoc ); 1043 tidyRelease( tdoc ); 1044 return 0; /* success */ 1045 } 1046 else if ( strcasecmp(arg, "config") == 0 ) 1047 { 1048 if ( argc >= 3 ) 1049 { 1050 ctmbstr post; 1051 1052 tidyLoadConfig( tdoc, argv[2] ); 1053 1054 /* Set new error output stream if setting changed */ 1055 post = tidyOptGetValue( tdoc, TidyErrFile ); 1056 if ( post && (!errfil || !samefile(errfil, post)) ) 1057 { 1058 errfil = post; 1059 errout = tidySetErrorFile( tdoc, post ); 1060 } 1061 1062 --argc; 1063 ++argv; 1064 } 1065 } 1066 1067#if SUPPORT_ASIAN_ENCODINGS 1068 else if ( strcasecmp(arg, "language") == 0 || 1069 strcasecmp(arg, "lang") == 0 ) 1070 { 1071 if ( argc >= 3 ) 1072 { 1073 tidyOptSetValue( tdoc, TidyLanguage, argv[2] ); 1074 --argc; 1075 ++argv; 1076 } 1077 } 1078#endif 1079 1080 else if ( strcasecmp(arg, "output") == 0 || 1081 strcasecmp(arg, "-output-file") == 0 || 1082 strcasecmp(arg, "o") == 0 ) 1083 { 1084 if ( argc >= 3 ) 1085 { 1086 tidyOptSetValue( tdoc, TidyOutFile, argv[2] ); 1087 --argc; 1088 ++argv; 1089 } 1090 } 1091 else if ( strcasecmp(arg, "file") == 0 || 1092 strcasecmp(arg, "-file") == 0 || 1093 strcasecmp(arg, "f") == 0 ) 1094 { 1095 if ( argc >= 3 ) 1096 { 1097 errfil = argv[2]; 1098 errout = tidySetErrorFile( tdoc, errfil ); 1099 --argc; 1100 ++argv; 1101 } 1102 } 1103 else if ( strcasecmp(arg, "wrap") == 0 || 1104 strcasecmp(arg, "-wrap") == 0 || 1105 strcasecmp(arg, "w") == 0 ) 1106 { 1107 if ( argc >= 3 ) 1108 { 1109 uint wraplen = 0; 1110 int nfields = sscanf( argv[2], "%u", &wraplen ); 1111 tidyOptSetInt( tdoc, TidyWrapLen, wraplen ); 1112 if (nfields > 0) 1113 { 1114 --argc; 1115 ++argv; 1116 } 1117 } 1118 } 1119 else if ( strcasecmp(arg, "version") == 0 || 1120 strcasecmp(arg, "-version") == 0 || 1121 strcasecmp(arg, "v") == 0 ) 1122 { 1123 version(); 1124 tidyRelease( tdoc ); 1125 return 0; /* success */ 1126 1127 } 1128 else if ( strncmp(argv[1], "--", 2 ) == 0) 1129 { 1130 if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) ) 1131 { 1132 /* Set new error output stream if setting changed */ 1133 ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile ); 1134 if ( post && (!errfil || !samefile(errfil, post)) ) 1135 { 1136 errfil = post; 1137 errout = tidySetErrorFile( tdoc, post ); 1138 } 1139 1140 ++argv; 1141 --argc; 1142 } 1143 } 1144 1145#if SUPPORT_ACCESSIBILITY_CHECKS 1146 else if ( strcasecmp(arg, "access") == 0 ) 1147 { 1148 if ( argc >= 3 ) 1149 { 1150 uint acclvl = 0; 1151 int nfields = sscanf( argv[2], "%u", &acclvl ); 1152 tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl ); 1153 if (nfields > 0) 1154 { 1155 --argc; 1156 ++argv; 1157 } 1158 } 1159 } 1160#endif 1161 1162 else 1163 { 1164 uint c; 1165 ctmbstr s = argv[1]; 1166 1167 while ( c = *++s ) 1168 { 1169 switch ( c ) 1170 { 1171 case 'i': 1172 tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState ); 1173 if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 ) 1174 tidyOptResetToDefault( tdoc, TidyIndentSpaces ); 1175 break; 1176 1177 /* Usurp -o for output file. Anyone hiding end tags? 1178 case 'o': 1179 tidyOptSetBool( tdoc, TidyHideEndTags, yes ); 1180 break; 1181 */ 1182 1183 case 'u': 1184 tidyOptSetBool( tdoc, TidyUpperCaseTags, yes ); 1185 break; 1186 1187 case 'c': 1188 tidyOptSetBool( tdoc, TidyMakeClean, yes ); 1189 break; 1190 1191 case 'b': 1192 tidyOptSetBool( tdoc, TidyMakeBare, yes ); 1193 break; 1194 1195 case 'n': 1196 tidyOptSetBool( tdoc, TidyNumEntities, yes ); 1197 break; 1198 1199 case 'm': 1200 tidyOptSetBool( tdoc, TidyWriteBack, yes ); 1201 break; 1202 1203 case 'e': 1204 tidyOptSetBool( tdoc, TidyShowMarkup, no ); 1205 break; 1206 1207 case 'q': 1208 tidyOptSetBool( tdoc, TidyQuiet, yes ); 1209 break; 1210 1211 default: 1212 unknownOption( c ); 1213 break; 1214 } 1215 } 1216 } 1217 1218 --argc; 1219 ++argv; 1220 continue; 1221 } 1222 1223 if ( argc > 1 ) 1224 { 1225 htmlfil = argv[1]; 1226 if ( tidyOptGetBool(tdoc, TidyEmacs) ) 1227 tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil ); 1228 status = tidyParseFile( tdoc, htmlfil ); 1229 } 1230 else 1231 { 1232 htmlfil = "stdin"; 1233 status = tidyParseStdin( tdoc ); 1234 } 1235 1236 if ( status >= 0 ) 1237 status = tidyCleanAndRepair( tdoc ); 1238 1239 if ( status >= 0 ) 1240 status = tidyRunDiagnostics( tdoc ); 1241 1242 if ( status > 1 ) /* If errors, do we want to force output? */ 1243 status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 ); 1244 1245 if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) ) 1246 { 1247 if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 ) 1248 status = tidySaveFile( tdoc, htmlfil ); 1249 else 1250 { 1251 ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile ); 1252 if ( outfil ) 1253 status = tidySaveFile( tdoc, outfil ); 1254 else 1255 status = tidySaveStdout( tdoc ); 1256 } 1257 } 1258 1259 contentErrors += tidyErrorCount( tdoc ); 1260 contentWarnings += tidyWarningCount( tdoc ); 1261 accessWarnings += tidyAccessWarningCount( tdoc ); 1262 1263 --argc; 1264 ++argv; 1265 1266 if ( argc <= 1 ) 1267 break; 1268 } 1269 1270 if (!tidyOptGetBool(tdoc, TidyQuiet) && 1271 errout == stderr && !contentErrors) 1272 fprintf(errout, "\n"); 1273 1274 if (contentErrors + contentWarnings > 0 && 1275 !tidyOptGetBool(tdoc, TidyQuiet)) 1276 tidyErrorSummary(tdoc); 1277 1278 if (!tidyOptGetBool(tdoc, TidyQuiet)) 1279 tidyGeneralInfo(tdoc); 1280 1281 /* called to free hash tables etc. */ 1282 tidyRelease( tdoc ); 1283 1284 /* return status can be used by scripts */ 1285 if ( contentErrors > 0 ) 1286 return 2; 1287 1288 if ( contentWarnings > 0 ) 1289 return 1; 1290 1291 /* 0 signifies all is ok */ 1292 return 0; 1293} 1294 1295/* 1296 * local variables: 1297 * mode: c 1298 * indent-tabs-mode: nil 1299 * c-basic-offset: 4 1300 * eval: (c-set-offset 'substatement-open 0) 1301 * end: 1302 */ 1303