1/*
2  tidy.c - HTML TidyLib command line driver
3
4  Copyright (c) 1998-2006 World Wide Web Consortium
5  (Massachusetts Institute of Technology, European Research
6  Consortium for Informatics and Mathematics, Keio University).
7  All Rights Reserved.
8
9  CVS Info :
10
11    $Author$
12    $Date$
13    $Revision$
14*/
15
16#include "tidy.h"
17
18static FILE* errout = NULL;  /* set to stderr */
19/* static FILE* txtout = NULL; */  /* set to stdout */
20
21static Bool samefile( ctmbstr filename1, ctmbstr filename2 )
22{
23#if FILENAMES_CASE_SENSITIVE
24    return ( strcmp( filename1, filename2 ) == 0 );
25#else
26    return ( strcasecmp( filename1, filename2 ) == 0 );
27#endif
28}
29
30static const char *cutToWhiteSpace(const char *s, uint offset, char *sbuf)
31{
32    if (!s)
33    {
34        sbuf[0] = '\0';
35        return NULL;
36    }
37    else if (strlen(s) <= offset)
38    {
39        strcpy(sbuf,s);
40        sbuf[offset] = '\0';
41        return NULL;
42    }
43    else
44    {
45        uint j, l, n;
46        j = offset;
47        while(j && s[j] != ' ')
48            --j;
49        l = j;
50        n = j+1;
51        /* no white space */
52        if (j==0)
53        {
54            l = offset;
55            n = offset;
56        }
57        strncpy(sbuf,s,l);
58        sbuf[l] = '\0';
59        return s+n;
60    }
61}
62
63static void print2Columns( const char* fmt, uint l1, uint l2,
64                           const char *c1, const char *c2 )
65{
66    const char *pc1=c1, *pc2=c2;
67    char *c1buf = (char *)malloc(l1+1);
68    char *c2buf = (char *)malloc(l2+1);
69
70    do
71    {
72        pc1 = cutToWhiteSpace(pc1, l1, c1buf);
73        pc2 = cutToWhiteSpace(pc2, l2, c2buf);
74        printf(fmt,
75               c1buf[0]!='\0'?c1buf:"",
76               c2buf[0]!='\0'?c2buf:"");
77    } while (pc1 || pc2);
78    free(c1buf);
79    free(c2buf);
80}
81
82static void print3Columns( const char* fmt, uint l1, uint l2, uint l3,
83                           const char *c1, const char *c2, const char *c3 )
84{
85    const char *pc1=c1, *pc2=c2, *pc3=c3;
86    char *c1buf = (char *)malloc(l1+1);
87    char *c2buf = (char *)malloc(l2+1);
88    char *c3buf = (char *)malloc(l3+1);
89
90    do
91    {
92        pc1 = cutToWhiteSpace(pc1, l1, c1buf);
93        pc2 = cutToWhiteSpace(pc2, l2, c2buf);
94        pc3 = cutToWhiteSpace(pc3, l3, c3buf);
95        printf(fmt,
96               c1buf[0]!='\0'?c1buf:"",
97               c2buf[0]!='\0'?c2buf:"",
98               c3buf[0]!='\0'?c3buf:"");
99    } while (pc1 || pc2 || pc3);
100    free(c1buf);
101    free(c2buf);
102    free(c3buf);
103}
104
105static const char helpfmt[] = " %-19.19s %-58.58s\n";
106static const char helpul[]
107        = "-----------------------------------------------------------------";
108static const char fmt[] = "%-27.27s %-9.9s  %-40.40s\n";
109static const char valfmt[] = "%-27.27s %-9.9s %-1.1s%-39.39s\n";
110static const char ul[]
111        = "=================================================================";
112
113typedef enum
114{
115  CmdOptFileManip,
116  CmdOptCatFIRST = CmdOptFileManip,
117  CmdOptProcDir,
118  CmdOptCharEnc,
119  CmdOptMisc,
120  CmdOptCatLAST,
121} CmdOptCategory;
122
123static const struct {
124    ctmbstr mnemonic;
125    ctmbstr name;
126} cmdopt_catname[] = {
127    { "file-manip", "File manipulation" },
128    { "process-directives", "Processing directives" },
129    { "char-encoding", "Character encodings" },
130    { "misc", "Miscellaneous" }
131};
132
133typedef struct {
134    ctmbstr name1;      /**< Name */
135    ctmbstr desc;       /**< Description */
136    ctmbstr eqconfig;   /**< Equivalent configuration option */
137    CmdOptCategory cat; /**< Category */
138    ctmbstr name2;      /**< Name */
139    ctmbstr name3;      /**< Name */
140} CmdOptDesc;
141
142static const CmdOptDesc cmdopt_defs[] =  {
143    { "-output <file>",
144      "write output to the specified <file>",
145      "output-file: <file>", CmdOptFileManip, "-o <file>" },
146    { "-config <file>",
147      "set configuration options from the specified <file>",
148      NULL, CmdOptFileManip },
149    { "-file <file>",
150      "write errors to the specified <file>",
151      "error-file: <file>", CmdOptFileManip, "-f <file>" },
152    { "-modify",
153      "modify the original input files",
154      "write-back: yes", CmdOptFileManip, "-m" },
155    { "-indent",
156      "indent element content",
157      "indent: auto", CmdOptProcDir, "-i" },
158    { "-wrap <column>",
159      "wrap text at the specified <column>"
160      ". 0 is assumed if <column> is missing. "
161      "When this option is omitted, the default of the configuration option "
162      "\"wrap\" applies.",
163      "wrap: <column>", CmdOptProcDir, "-w <column>" },
164    { "-upper",
165      "force tags to upper case",
166      "uppercase-tags: yes", CmdOptProcDir, "-u" },
167    { "-clean",
168      "replace FONT, NOBR and CENTER tags by CSS",
169      "clean: yes", CmdOptProcDir, "-c" },
170    { "-bare",
171      "strip out smart quotes and em dashes, etc.",
172      "bare: yes", CmdOptProcDir, "-b" },
173    { "-numeric",
174      "output numeric rather than named entities",
175      "numeric-entities: yes", CmdOptProcDir, "-n" },
176    { "-errors",
177      "only show errors",
178      "markup: no", CmdOptProcDir, "-e" },
179    { "-quiet",
180      "suppress nonessential output",
181      "quiet: yes", CmdOptProcDir, "-q" },
182    { "-omit",
183      "omit optional end tags",
184      "hide-endtags: yes", CmdOptProcDir },
185    { "-xml",
186      "specify the input is well formed XML",
187      "input-xml: yes", CmdOptProcDir },
188    { "-asxml",
189      "convert HTML to well formed XHTML",
190      "output-xhtml: yes", CmdOptProcDir, "-asxhtml" },
191    { "-ashtml",
192      "force XHTML to well formed HTML",
193      "output-html: yes", CmdOptProcDir },
194#if SUPPORT_ACCESSIBILITY_CHECKS
195    { "-access <level>",
196      "do additional accessibility checks (<level> = 0, 1, 2, 3)"
197      ". 0 is assumed if <level> is missing.",
198      "accessibility-check: <level>", CmdOptProcDir },
199#endif
200    { "-raw",
201      "output values above 127 without conversion to entities",
202      NULL, CmdOptCharEnc },
203    { "-ascii",
204      "use ISO-8859-1 for input, US-ASCII for output",
205      NULL, CmdOptCharEnc },
206    { "-latin0",
207      "use ISO-8859-15 for input, US-ASCII for output",
208      NULL, CmdOptCharEnc },
209    { "-latin1",
210      "use ISO-8859-1 for both input and output",
211      NULL, CmdOptCharEnc },
212#ifndef NO_NATIVE_ISO2022_SUPPORT
213    { "-iso2022",
214      "use ISO-2022 for both input and output",
215      NULL, CmdOptCharEnc },
216#endif
217    { "-utf8",
218      "use UTF-8 for both input and output",
219      NULL, CmdOptCharEnc },
220    { "-mac",
221      "use MacRoman for input, US-ASCII for output",
222      NULL, CmdOptCharEnc },
223    { "-win1252",
224      "use Windows-1252 for input, US-ASCII for output",
225      NULL, CmdOptCharEnc },
226    { "-ibm858",
227      "use IBM-858 (CP850+Euro) for input, US-ASCII for output",
228      NULL, CmdOptCharEnc },
229#if SUPPORT_UTF16_ENCODINGS
230    { "-utf16le",
231      "use UTF-16LE for both input and output",
232      NULL, CmdOptCharEnc },
233    { "-utf16be",
234      "use UTF-16BE for both input and output",
235      NULL, CmdOptCharEnc },
236    { "-utf16",
237      "use UTF-16 for both input and output",
238      NULL, CmdOptCharEnc },
239#endif
240#if SUPPORT_ASIAN_ENCODINGS /* #431953 - RJ */
241    { "-big5",
242      "use Big5 for both input and output",
243      NULL, CmdOptCharEnc },
244    { "-shiftjis",
245      "use Shift_JIS for both input and output",
246      NULL, CmdOptCharEnc },
247    { "-language <lang>",
248      "set the two-letter language code <lang> (for future use)",
249      "language: <lang>", CmdOptCharEnc },
250#endif
251    { "-version",
252      "show the version of Tidy",
253      NULL, CmdOptMisc, "-v" },
254    { "-help",
255      "list the command line options",
256      NULL, CmdOptMisc, "-h", "-?" },
257    { "-xml-help",
258      "list the command line options in XML format",
259      NULL, CmdOptMisc },
260    { "-help-config",
261      "list all configuration options",
262      NULL, CmdOptMisc },
263    { "-xml-config",
264      "list all configuration options in XML format",
265      NULL, CmdOptMisc },
266    { "-show-config",
267      "list the current configuration settings",
268      NULL, CmdOptMisc },
269    { NULL, NULL, NULL, CmdOptMisc }
270};
271
272static tmbstr get_option_names( const CmdOptDesc* pos )
273{
274    tmbstr name;
275    uint len = strlen(pos->name1);
276    if (pos->name2)
277        len += 2+strlen(pos->name2);
278    if (pos->name3)
279        len += 2+strlen(pos->name3);
280
281    name = (tmbstr)malloc(len+1);
282    strcpy(name, pos->name1);
283    if (pos->name2)
284    {
285        strcat(name, ", ");
286        strcat(name, pos->name2);
287    }
288    if (pos->name3)
289    {
290        strcat(name, ", ");
291        strcat(name, pos->name3);
292    }
293    return name;
294}
295
296static tmbstr get_escaped_name( ctmbstr name )
297{
298    tmbstr escpName;
299    char aux[2];
300    uint len = 0;
301    ctmbstr c;
302    for(c=name; *c!='\0'; ++c)
303        switch(*c)
304        {
305        case '<':
306        case '>':
307            len += 4;
308            break;
309        case '"':
310            len += 6;
311            break;
312        default:
313            len += 1;
314            break;
315        }
316
317    escpName = (tmbstr)malloc(len+1);
318    escpName[0] = '\0';
319
320    aux[1] = '\0';
321    for(c=name; *c!='\0'; ++c)
322        switch(*c)
323        {
324        case '<':
325            strcat(escpName, "&lt;");
326            break;
327        case '>':
328            strcat(escpName, "&gt;");
329            break;
330        case '"':
331            strcat(escpName, "&quot;");
332            break;
333        default:
334            aux[0] = *c;
335            strcat(escpName, aux);
336            break;
337        }
338
339    return escpName;
340}
341
342static void print_help_option( void )
343{
344    CmdOptCategory cat = CmdOptCatFIRST;
345    const CmdOptDesc* pos = cmdopt_defs;
346
347    for( cat=CmdOptCatFIRST; cat!=CmdOptCatLAST; ++cat)
348    {
349        size_t len =  strlen(cmdopt_catname[cat].name);
350        printf("%s\n", cmdopt_catname[cat].name );
351        printf("%*.*s\n", (int)len, (int)len, helpul );
352        for( pos=cmdopt_defs; pos->name1; ++pos)
353        {
354            tmbstr name;
355            if (pos->cat != cat)
356                continue;
357            name = get_option_names( pos );
358            print2Columns( helpfmt, 19, 58, name, pos->desc );
359            free(name);
360        }
361        printf("\n");
362    }
363}
364
365static void print_xml_help_option_element( ctmbstr element, ctmbstr name )
366{
367    tmbstr escpName;
368    if (!name)
369        return;
370    printf("  <%s>%s</%s>\n", element, escpName = get_escaped_name(name),
371           element);
372    free(escpName);
373}
374
375static void print_xml_help_option( void )
376{
377    const CmdOptDesc* pos = cmdopt_defs;
378
379    for( pos=cmdopt_defs; pos->name1; ++pos)
380    {
381        printf(" <option class=\"%s\">\n", cmdopt_catname[pos->cat].mnemonic );
382        print_xml_help_option_element("name", pos->name1);
383        print_xml_help_option_element("name", pos->name2);
384        print_xml_help_option_element("name", pos->name3);
385        print_xml_help_option_element("description", pos->desc);
386        if (pos->eqconfig)
387            print_xml_help_option_element("eqconfig", pos->eqconfig);
388        else
389            printf("  <eqconfig />\n");
390        printf(" </option>\n");
391    }
392}
393
394static void xml_help( void )
395{
396    printf( "<?xml version=\"1.0\"?>\n"
397            "<cmdline version=\"%s\">\n", tidyReleaseDate());
398    print_xml_help_option();
399    printf( "</cmdline>\n" );
400}
401
402static void help( ctmbstr prog )
403{
404    printf( "%s [option...] [file...] [option...] [file...]\n", prog );
405    printf( "Utility to clean up and pretty print HTML/XHTML/XML\n");
406    printf( "see http://tidy.sourceforge.net/\n");
407    printf( "\n");
408
409#ifdef PLATFORM_NAME
410    printf( "Options for HTML Tidy for %s released on %s:\n",
411             PLATFORM_NAME, tidyReleaseDate() );
412#else
413    printf( "Options for HTML Tidy released on %s:\n", tidyReleaseDate() );
414#endif
415    printf( "\n");
416
417    print_help_option();
418
419    printf( "Use --blah blarg for any configuration option \"blah\" with argument \"blarg\"\n");
420    printf( "\n");
421
422    printf( "Input/Output default to stdin/stdout respectively\n");
423    printf( "Single letter options apart from -f may be combined\n");
424    printf( "as in:  tidy -f errs.txt -imu foo.html\n");
425    printf( "For further info on HTML see http://www.w3.org/MarkUp\n");
426    printf( "\n");
427}
428
429static Bool isAutoBool( TidyOption topt )
430{
431    TidyIterator pos;
432    ctmbstr def;
433
434    if ( tidyOptGetType( topt ) != TidyInteger)
435        return no;
436
437    pos = tidyOptGetPickList( topt );
438    while ( pos )
439    {
440        def = tidyOptGetNextPick( topt, &pos );
441        if (0==strcmp(def,"yes"))
442           return yes;
443    }
444    return no;
445}
446
447static
448ctmbstr ConfigCategoryName( TidyConfigCategory id )
449{
450    switch( id )
451    {
452    case TidyMarkup:
453        return "markup";
454    case TidyDiagnostics:
455        return "diagnostics";
456    case TidyPrettyPrint:
457        return "print";
458    case TidyEncoding:
459        return "encoding";
460    case TidyMiscellaneous:
461        return "misc";
462    }
463    fprintf(stderr, "Fatal error: impossible value for id='%d'.\n", (int)id);
464    assert(0);
465    abort();
466}
467
468/* Description of an option */
469typedef struct {
470    ctmbstr name;  /**< Name */
471    ctmbstr cat;   /**< Category */
472    ctmbstr type;  /**< "String, ... */
473    ctmbstr vals;  /**< Potential values. If NULL, use an external function */
474    ctmbstr def;   /**< default */
475    tmbchar tempdefs[80]; /**< storage for default such as integer */
476    Bool haveVals; /**< if yes, vals is valid */
477} OptionDesc;
478
479typedef void (*OptionFunc)( TidyDoc, TidyOption, OptionDesc * );
480
481
482/* Create description "d" related to "opt" */
483static
484void GetOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
485{
486    TidyOptionId optId = tidyOptGetId( topt );
487    TidyOptionType optTyp = tidyOptGetType( topt );
488
489    d->name = tidyOptGetName( topt );
490    d->cat = ConfigCategoryName( tidyOptGetCategory( topt ) );
491    d->vals = NULL;
492    d->def = NULL;
493    d->haveVals = yes;
494
495    /* Handle special cases first.
496     */
497    switch ( optId )
498    {
499    case TidyDuplicateAttrs:
500    case TidyNewline:
501    case TidyAccessibilityCheckLevel:
502        d->type = "enum";
503        d->vals = NULL;
504        d->def =
505            optId==TidyNewline ?
506            "<em>Platform dependent</em>"
507            :tidyOptGetCurrPick( tdoc, optId );
508        break;
509
510    case TidyDoctype:
511        d->type = "DocType";
512        d->vals = NULL;
513        {
514            ctmbstr sdef = NULL;
515            sdef = tidyOptGetCurrPick( tdoc, TidyDoctypeMode );
516            if ( !sdef || *sdef == '*' )
517                sdef = tidyOptGetValue( tdoc, TidyDoctype );
518            d->def = sdef;
519        }
520        break;
521
522    case TidyInlineTags:
523    case TidyBlockTags:
524    case TidyEmptyTags:
525    case TidyPreTags:
526        d->type = "Tag names";
527        d->vals = "tagX, tagY, ...";
528        d->def = NULL;
529        break;
530
531    case TidyCharEncoding:
532    case TidyInCharEncoding:
533    case TidyOutCharEncoding:
534        d->type = "Encoding";
535        d->def = tidyOptGetEncName( tdoc, optId );
536        if (!d->def)
537            d->def = "?";
538        d->vals = NULL;
539        break;
540
541        /* General case will handle remaining */
542    default:
543        switch ( optTyp )
544        {
545        case TidyBoolean:
546            d->type = "Boolean";
547            d->vals = "y/n, yes/no, t/f, true/false, 1/0";
548            d->def = tidyOptGetCurrPick( tdoc, optId );
549            break;
550
551        case TidyInteger:
552            if (isAutoBool(topt))
553            {
554                d->type = "AutoBool";
555                d->vals = "auto, y/n, yes/no, t/f, true/false, 1/0";
556                d->def = tidyOptGetCurrPick( tdoc, optId );
557            }
558            else
559            {
560                uint idef;
561                d->type = "Integer";
562                if ( optId == TidyWrapLen )
563                    d->vals = "0 (no wrapping), 1, 2, ...";
564                else
565                    d->vals = "0, 1, 2, ...";
566
567                idef = tidyOptGetInt( tdoc, optId );
568                sprintf(d->tempdefs, "%u", idef);
569                d->def = d->tempdefs;
570            }
571            break;
572
573        case TidyString:
574            d->type = "String";
575            d->vals = NULL;
576            d->haveVals = no;
577            d->def = tidyOptGetValue( tdoc, optId );
578            break;
579        }
580    }
581}
582
583/* Array holding all options. Contains a trailing sentinel. */
584typedef struct {
585    TidyOption topt[N_TIDY_OPTIONS];
586} AllOption_t;
587
588static
589int cmpOpt(const void* e1_, const void *e2_)
590{
591    const TidyOption* e1 = (const TidyOption*)e1_;
592    const TidyOption* e2 = (const TidyOption*)e2_;
593    return strcmp(tidyOptGetName(*e1), tidyOptGetName(*e2));
594}
595
596static
597void getSortedOption( TidyDoc tdoc, AllOption_t *tOption )
598{
599    TidyIterator pos = tidyGetOptionList( tdoc );
600    uint i = 0;
601
602    while ( pos )
603    {
604        TidyOption topt = tidyGetNextOption( tdoc, &pos );
605        tOption->topt[i] = topt;
606        ++i;
607    }
608    tOption->topt[i] = NULL; /* sentinel */
609
610    qsort(tOption->topt,
611          /* Do not sort the sentinel: hence `-1' */
612          sizeof(tOption->topt)/sizeof(tOption->topt[0])-1,
613          sizeof(tOption->topt[0]),
614          cmpOpt);
615}
616
617static void ForEachSortedOption( TidyDoc tdoc, OptionFunc OptionPrint )
618{
619    AllOption_t tOption;
620    const TidyOption *topt;
621
622    getSortedOption( tdoc, &tOption );
623    for( topt = tOption.topt; *topt; ++topt)
624    {
625        OptionDesc d;
626
627        GetOption( tdoc, *topt, &d );
628        (*OptionPrint)( tdoc, *topt, &d );
629    }
630}
631
632static void ForEachOption( TidyDoc tdoc, OptionFunc OptionPrint )
633{
634    TidyIterator pos = tidyGetOptionList( tdoc );
635
636    while ( pos )
637    {
638        TidyOption topt = tidyGetNextOption( tdoc, &pos );
639        OptionDesc d;
640
641        GetOption( tdoc, topt, &d );
642        (*OptionPrint)( tdoc, topt, &d );
643    }
644}
645
646static
647void PrintAllowedValuesFromPick( TidyOption topt )
648{
649    TidyIterator pos = tidyOptGetPickList( topt );
650    Bool first = yes;
651    ctmbstr def;
652    while ( pos )
653    {
654        if (first)
655            first = no;
656        else
657            printf(", ");
658        def = tidyOptGetNextPick( topt, &pos );
659        printf("%s", def);
660    }
661}
662
663static
664void PrintAllowedValues( TidyOption topt, const OptionDesc *d )
665{
666    if (d->vals)
667        printf( "%s", d->vals );
668    else
669        PrintAllowedValuesFromPick( topt );
670}
671
672static
673void printXMLDescription( TidyDoc tdoc, TidyOption topt )
674{
675    ctmbstr doc = tidyOptGetDoc( tdoc, topt );
676
677    if (doc)
678        printf("  <description>%s</description>\n", doc);
679    else
680    {
681        printf("  <description />\n");
682        fprintf(stderr, "Warning: option `%s' is not documented.\n",
683                tidyOptGetName( topt ));
684    }
685}
686
687static
688void printXMLCrossRef( TidyDoc tdoc, TidyOption topt )
689{
690    TidyOption optLinked;
691    TidyIterator pos = tidyOptGetDocLinksList(tdoc, topt);
692    while( pos )
693    {
694        optLinked = tidyOptGetNextDocLinks(tdoc, &pos );
695        printf("  <seealso>%s</seealso>\n",tidyOptGetName(optLinked));
696    }
697}
698
699static
700void printXMLOption( TidyDoc tdoc, TidyOption topt, OptionDesc *d )
701{
702    if ( tidyOptIsReadOnly(topt) )
703        return;
704
705    printf( " <option class=\"%s\">\n", d->cat );
706    printf  ("  <name>%s</name>\n",d->name);
707    printf  ("  <type>%s</type>\n",d->type);
708    if (d->def)
709        printf("  <default>%s</default>\n",d->def);
710    else
711        printf("  <default />\n");
712    if (d->haveVals)
713    {
714        printf("  <example>");
715        PrintAllowedValues( topt, d );
716        printf("</example>\n");
717    }
718    else
719    {
720        printf("  <example />\n");
721    }
722    printXMLDescription( tdoc, topt );
723    printXMLCrossRef( tdoc, topt );
724    printf( " </option>\n" );
725}
726
727static void XMLoptionhelp( TidyDoc tdoc )
728{
729    printf( "<?xml version=\"1.0\"?>\n"
730            "<config version=\"%s\">\n", tidyReleaseDate());
731    ForEachOption( tdoc, printXMLOption );
732    printf( "</config>\n" );
733}
734
735static
736tmbstr GetAllowedValuesFromPick( TidyOption topt )
737{
738    TidyIterator pos;
739    Bool first;
740    ctmbstr def;
741    uint len = 0;
742    tmbstr val;
743
744    pos = tidyOptGetPickList( topt );
745    first = yes;
746    while ( pos )
747    {
748        if (first)
749            first = no;
750        else
751            len += 2;
752        def = tidyOptGetNextPick( topt, &pos );
753        len += strlen(def);
754    }
755    val = (tmbstr)malloc(len+1);
756    val[0] = '\0';
757    pos = tidyOptGetPickList( topt );
758    first = yes;
759    while ( pos )
760    {
761        if (first)
762            first = no;
763        else
764            strcat(val, ", ");
765        def = tidyOptGetNextPick( topt, &pos );
766        strcat(val, def);
767    }
768    return val;
769}
770
771static
772tmbstr GetAllowedValues( TidyOption topt, const OptionDesc *d )
773{
774    if (d->vals)
775    {
776        tmbstr val = (tmbstr)malloc(1+strlen(d->vals));
777        strcpy(val, d->vals);
778        return val;
779    }
780    else
781        return GetAllowedValuesFromPick( topt );
782}
783
784static
785void printOption( TidyDoc ARG_UNUSED(tdoc), TidyOption topt,
786                  OptionDesc *d )
787{
788    if ( tidyOptIsReadOnly(topt) )
789        return;
790
791    if ( *d->name || *d->type )
792    {
793        ctmbstr pval = d->vals;
794        tmbstr val = NULL;
795        if (!d->haveVals)
796        {
797            pval = "-";
798        }
799        else if (pval == NULL)
800        {
801            val = GetAllowedValues( topt, d);
802            pval = val;
803        }
804        print3Columns( fmt, 27, 9, 40, d->name, d->type, pval );
805        if (val)
806            free(val);
807    }
808}
809
810static void optionhelp( TidyDoc tdoc )
811{
812    printf( "\nHTML Tidy Configuration Settings\n\n" );
813    printf( "Within a file, use the form:\n\n" );
814    printf( "wrap: 72\n" );
815    printf( "indent: no\n\n" );
816    printf( "When specified on the command line, use the form:\n\n" );
817    printf( "--wrap 72 --indent no\n\n");
818
819    printf( fmt, "Name", "Type", "Allowable values" );
820    printf( fmt, ul, ul, ul );
821
822    ForEachSortedOption( tdoc, printOption );
823}
824
825static
826void printOptionValues( TidyDoc ARG_UNUSED(tdoc), TidyOption topt,
827                        OptionDesc *d )
828{
829    TidyOptionId optId = tidyOptGetId( topt );
830    ctmbstr ro = tidyOptIsReadOnly( topt ) ? "*" : "" ;
831
832    switch ( optId )
833    {
834    case TidyInlineTags:
835    case TidyBlockTags:
836    case TidyEmptyTags:
837    case TidyPreTags:
838        {
839            TidyIterator pos = tidyOptGetDeclTagList( tdoc );
840            while ( pos )
841            {
842                d->def = tidyOptGetNextDeclTag(tdoc, optId, &pos);
843                if ( pos )
844                {
845                    if ( *d->name )
846                        printf( valfmt, d->name, d->type, ro, d->def );
847                    else
848                        printf( fmt, d->name, d->type, d->def );
849                    d->name = "";
850                    d->type = "";
851                }
852            }
853        }
854        break;
855    case TidyNewline:
856        d->def = tidyOptGetCurrPick( tdoc, optId );
857        break;
858    }
859
860    /* fix for http://tidy.sf.net/bug/873921 */
861    if ( *d->name || *d->type || (d->def && *d->def) )
862    {
863        if ( ! d->def )
864            d->def = "";
865        if ( *d->name )
866            printf( valfmt, d->name, d->type, ro, d->def );
867        else
868            printf( fmt, d->name, d->type, d->def );
869    }
870}
871
872static void optionvalues( TidyDoc tdoc )
873{
874    printf( "\nConfiguration File Settings:\n\n" );
875    printf( fmt, "Name", "Type", "Current Value" );
876    printf( fmt, ul, ul, ul );
877
878    ForEachSortedOption( tdoc, printOptionValues );
879
880    printf( "\n\nValues marked with an *asterisk are calculated \n"
881            "internally by HTML Tidy\n\n" );
882}
883
884static void version( void )
885{
886#ifdef PLATFORM_NAME
887    printf( "HTML Tidy for %s released on %s\n",
888             PLATFORM_NAME, tidyReleaseDate() );
889#else
890    printf( "HTML Tidy released on %s\n", tidyReleaseDate() );
891#endif
892}
893
894static void unknownOption( uint c )
895{
896    fprintf( errout, "HTML Tidy: unknown option: %c\n", (char)c );
897}
898
899int main( int argc, char** argv )
900{
901    ctmbstr prog = argv[0];
902    ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL;
903    TidyDoc tdoc = tidyCreate();
904    int status = 0;
905
906    uint contentErrors = 0;
907    uint contentWarnings = 0;
908    uint accessWarnings = 0;
909
910    errout = stderr;  /* initialize to stderr */
911    status = 0;
912
913#ifdef CONFIG_FILE
914    if ( tidyFileExists(CONFIG_FILE) )
915    {
916        status = tidyLoadConfig( tdoc, CONFIG_FILE );
917        if ( status != 0 )
918            fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", CONFIG_FILE, status);
919    }
920#endif /* CONFIG_FILE */
921
922    /* look for env var "HTML_TIDY" */
923    /* then for ~/.tidyrc (on platforms defining $HOME) */
924
925    if ( cfgfil = getenv("HTML_TIDY") )
926    {
927        status = tidyLoadConfig( tdoc, cfgfil );
928        if ( status != 0 )
929            fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", cfgfil, status);
930    }
931#ifdef USER_CONFIG_FILE
932    else if ( tidyFileExists(USER_CONFIG_FILE) )
933    {
934        status = tidyLoadConfig( tdoc, USER_CONFIG_FILE );
935        if ( status != 0 )
936            fprintf(errout, "Loading config file \"%s\" failed, err = %d\n", USER_CONFIG_FILE, status);
937    }
938#endif /* USER_CONFIG_FILE */
939
940    /* read command line */
941    while ( argc > 0 )
942    {
943        if (argc > 1 && argv[1][0] == '-')
944        {
945            /* support -foo and --foo */
946            ctmbstr arg = argv[1] + 1;
947
948            if ( strcasecmp(arg, "xml") == 0)
949                tidyOptSetBool( tdoc, TidyXmlTags, yes );
950
951            else if ( strcasecmp(arg,   "asxml") == 0 ||
952                      strcasecmp(arg, "asxhtml") == 0 )
953            {
954                tidyOptSetBool( tdoc, TidyXhtmlOut, yes );
955            }
956            else if ( strcasecmp(arg,   "ashtml") == 0 )
957                tidyOptSetBool( tdoc, TidyHtmlOut, yes );
958
959            else if ( strcasecmp(arg, "indent") == 0 )
960            {
961                tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
962                if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
963                    tidyOptResetToDefault( tdoc, TidyIndentSpaces );
964            }
965            else if ( strcasecmp(arg, "omit") == 0 )
966                tidyOptSetBool( tdoc, TidyHideEndTags, yes );
967
968            else if ( strcasecmp(arg, "upper") == 0 )
969                tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
970
971            else if ( strcasecmp(arg, "clean") == 0 )
972                tidyOptSetBool( tdoc, TidyMakeClean, yes );
973
974            else if ( strcasecmp(arg, "bare") == 0 )
975                tidyOptSetBool( tdoc, TidyMakeBare, yes );
976
977            else if ( strcasecmp(arg, "raw") == 0      ||
978                      strcasecmp(arg, "ascii") == 0    ||
979                      strcasecmp(arg, "latin0") == 0   ||
980                      strcasecmp(arg, "latin1") == 0   ||
981                      strcasecmp(arg, "utf8") == 0     ||
982#ifndef NO_NATIVE_ISO2022_SUPPORT
983                      strcasecmp(arg, "iso2022") == 0  ||
984#endif
985#if SUPPORT_UTF16_ENCODINGS
986                      strcasecmp(arg, "utf16le") == 0  ||
987                      strcasecmp(arg, "utf16be") == 0  ||
988                      strcasecmp(arg, "utf16") == 0    ||
989#endif
990#if SUPPORT_ASIAN_ENCODINGS
991                      strcasecmp(arg, "shiftjis") == 0 ||
992                      strcasecmp(arg, "big5") == 0     ||
993#endif
994                      strcasecmp(arg, "mac") == 0      ||
995                      strcasecmp(arg, "win1252") == 0  ||
996                      strcasecmp(arg, "ibm858") == 0 )
997            {
998                tidySetCharEncoding( tdoc, arg );
999            }
1000            else if ( strcasecmp(arg, "numeric") == 0 )
1001                tidyOptSetBool( tdoc, TidyNumEntities, yes );
1002
1003            else if ( strcasecmp(arg, "modify") == 0 ||
1004                      strcasecmp(arg, "change") == 0 ||  /* obsolete */
1005                      strcasecmp(arg, "update") == 0 )   /* obsolete */
1006            {
1007                tidyOptSetBool( tdoc, TidyWriteBack, yes );
1008            }
1009            else if ( strcasecmp(arg, "errors") == 0 )
1010                tidyOptSetBool( tdoc, TidyShowMarkup, no );
1011
1012            else if ( strcasecmp(arg, "quiet") == 0 )
1013                tidyOptSetBool( tdoc, TidyQuiet, yes );
1014
1015            else if ( strcasecmp(arg, "help") == 0 ||
1016                      strcasecmp(arg,    "h") == 0 || *arg == '?' )
1017            {
1018                help( prog );
1019                tidyRelease( tdoc );
1020                return 0; /* success */
1021            }
1022            else if ( strcasecmp(arg, "xml-help") == 0)
1023            {
1024                xml_help( );
1025                tidyRelease( tdoc );
1026                return 0; /* success */
1027            }
1028            else if ( strcasecmp(arg, "help-config") == 0 )
1029            {
1030                optionhelp( tdoc );
1031                tidyRelease( tdoc );
1032                return 0; /* success */
1033            }
1034            else if ( strcasecmp(arg, "xml-config") == 0 )
1035            {
1036                XMLoptionhelp( tdoc );
1037                tidyRelease( tdoc );
1038                return 0; /* success */
1039            }
1040            else if ( strcasecmp(arg, "show-config") == 0 )
1041            {
1042                optionvalues( tdoc );
1043                tidyRelease( tdoc );
1044                return 0; /* success */
1045            }
1046            else if ( strcasecmp(arg, "config") == 0 )
1047            {
1048                if ( argc >= 3 )
1049                {
1050                    ctmbstr post;
1051
1052                    tidyLoadConfig( tdoc, argv[2] );
1053
1054                    /* Set new error output stream if setting changed */
1055                    post = tidyOptGetValue( tdoc, TidyErrFile );
1056                    if ( post && (!errfil || !samefile(errfil, post)) )
1057                    {
1058                        errfil = post;
1059                        errout = tidySetErrorFile( tdoc, post );
1060                    }
1061
1062                    --argc;
1063                    ++argv;
1064                }
1065            }
1066
1067#if SUPPORT_ASIAN_ENCODINGS
1068            else if ( strcasecmp(arg, "language") == 0 ||
1069                      strcasecmp(arg,     "lang") == 0 )
1070            {
1071                if ( argc >= 3 )
1072                {
1073                    tidyOptSetValue( tdoc, TidyLanguage, argv[2] );
1074                    --argc;
1075                    ++argv;
1076                }
1077            }
1078#endif
1079
1080            else if ( strcasecmp(arg, "output") == 0 ||
1081                      strcasecmp(arg, "-output-file") == 0 ||
1082                      strcasecmp(arg, "o") == 0 )
1083            {
1084                if ( argc >= 3 )
1085                {
1086                    tidyOptSetValue( tdoc, TidyOutFile, argv[2] );
1087                    --argc;
1088                    ++argv;
1089                }
1090            }
1091            else if ( strcasecmp(arg,  "file") == 0 ||
1092                      strcasecmp(arg, "-file") == 0 ||
1093                      strcasecmp(arg,     "f") == 0 )
1094            {
1095                if ( argc >= 3 )
1096                {
1097                    errfil = argv[2];
1098                    errout = tidySetErrorFile( tdoc, errfil );
1099                    --argc;
1100                    ++argv;
1101                }
1102            }
1103            else if ( strcasecmp(arg,  "wrap") == 0 ||
1104                      strcasecmp(arg, "-wrap") == 0 ||
1105                      strcasecmp(arg,     "w") == 0 )
1106            {
1107                if ( argc >= 3 )
1108                {
1109                    uint wraplen = 0;
1110                    int nfields = sscanf( argv[2], "%u", &wraplen );
1111                    tidyOptSetInt( tdoc, TidyWrapLen, wraplen );
1112                    if (nfields > 0)
1113                    {
1114                        --argc;
1115                        ++argv;
1116                    }
1117                }
1118            }
1119            else if ( strcasecmp(arg,  "version") == 0 ||
1120                      strcasecmp(arg, "-version") == 0 ||
1121                      strcasecmp(arg,        "v") == 0 )
1122            {
1123                version();
1124                tidyRelease( tdoc );
1125                return 0;  /* success */
1126
1127            }
1128            else if ( strncmp(argv[1], "--", 2 ) == 0)
1129            {
1130                if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) )
1131                {
1132                    /* Set new error output stream if setting changed */
1133                    ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile );
1134                    if ( post && (!errfil || !samefile(errfil, post)) )
1135                    {
1136                        errfil = post;
1137                        errout = tidySetErrorFile( tdoc, post );
1138                    }
1139
1140                    ++argv;
1141                    --argc;
1142                }
1143            }
1144
1145#if SUPPORT_ACCESSIBILITY_CHECKS
1146            else if ( strcasecmp(arg, "access") == 0 )
1147            {
1148                if ( argc >= 3 )
1149                {
1150                    uint acclvl = 0;
1151                    int nfields = sscanf( argv[2], "%u", &acclvl );
1152                    tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl );
1153                    if (nfields > 0)
1154                    {
1155                        --argc;
1156                        ++argv;
1157                    }
1158                }
1159            }
1160#endif
1161
1162            else
1163            {
1164                uint c;
1165                ctmbstr s = argv[1];
1166
1167                while ( c = *++s )
1168                {
1169                    switch ( c )
1170                    {
1171                    case 'i':
1172                        tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
1173                        if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
1174                            tidyOptResetToDefault( tdoc, TidyIndentSpaces );
1175                        break;
1176
1177                    /* Usurp -o for output file.  Anyone hiding end tags?
1178                    case 'o':
1179                        tidyOptSetBool( tdoc, TidyHideEndTags, yes );
1180                        break;
1181                    */
1182
1183                    case 'u':
1184                        tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
1185                        break;
1186
1187                    case 'c':
1188                        tidyOptSetBool( tdoc, TidyMakeClean, yes );
1189                        break;
1190
1191                    case 'b':
1192                        tidyOptSetBool( tdoc, TidyMakeBare, yes );
1193                        break;
1194
1195                    case 'n':
1196                        tidyOptSetBool( tdoc, TidyNumEntities, yes );
1197                        break;
1198
1199                    case 'm':
1200                        tidyOptSetBool( tdoc, TidyWriteBack, yes );
1201                        break;
1202
1203                    case 'e':
1204                        tidyOptSetBool( tdoc, TidyShowMarkup, no );
1205                        break;
1206
1207                    case 'q':
1208                        tidyOptSetBool( tdoc, TidyQuiet, yes );
1209                        break;
1210
1211                    default:
1212                        unknownOption( c );
1213                        break;
1214                    }
1215                }
1216            }
1217
1218            --argc;
1219            ++argv;
1220            continue;
1221        }
1222
1223        if ( argc > 1 )
1224        {
1225            htmlfil = argv[1];
1226            if ( tidyOptGetBool(tdoc, TidyEmacs) )
1227                tidyOptSetValue( tdoc, TidyEmacsFile, htmlfil );
1228            status = tidyParseFile( tdoc, htmlfil );
1229        }
1230        else
1231        {
1232            htmlfil = "stdin";
1233            status = tidyParseStdin( tdoc );
1234        }
1235
1236        if ( status >= 0 )
1237            status = tidyCleanAndRepair( tdoc );
1238
1239        if ( status >= 0 )
1240            status = tidyRunDiagnostics( tdoc );
1241
1242        if ( status > 1 ) /* If errors, do we want to force output? */
1243            status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 );
1244
1245        if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) )
1246        {
1247            if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 )
1248                status = tidySaveFile( tdoc, htmlfil );
1249            else
1250            {
1251                ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
1252                if ( outfil )
1253                    status = tidySaveFile( tdoc, outfil );
1254                else
1255                    status = tidySaveStdout( tdoc );
1256            }
1257        }
1258
1259        contentErrors   += tidyErrorCount( tdoc );
1260        contentWarnings += tidyWarningCount( tdoc );
1261        accessWarnings  += tidyAccessWarningCount( tdoc );
1262
1263        --argc;
1264        ++argv;
1265
1266        if ( argc <= 1 )
1267            break;
1268    }
1269
1270    if (!tidyOptGetBool(tdoc, TidyQuiet) &&
1271        errout == stderr && !contentErrors)
1272        fprintf(errout, "\n");
1273
1274    if (contentErrors + contentWarnings > 0 &&
1275         !tidyOptGetBool(tdoc, TidyQuiet))
1276        tidyErrorSummary(tdoc);
1277
1278    if (!tidyOptGetBool(tdoc, TidyQuiet))
1279        tidyGeneralInfo(tdoc);
1280
1281    /* called to free hash tables etc. */
1282    tidyRelease( tdoc );
1283
1284    /* return status can be used by scripts */
1285    if ( contentErrors > 0 )
1286        return 2;
1287
1288    if ( contentWarnings > 0 )
1289        return 1;
1290
1291    /* 0 signifies all is ok */
1292    return 0;
1293}
1294
1295/*
1296 * local variables:
1297 * mode: c
1298 * indent-tabs-mode: nil
1299 * c-basic-offset: 4
1300 * eval: (c-set-offset 'substatement-open 0)
1301 * end:
1302 */
1303