1/*
2 * "$Id: mantohtml.c 11093 2013-07-03 20:48:42Z msweet $"
3 *
4 *   Man page to HTML conversion program.
5 *
6 *   Copyright 2007-2010 by Apple Inc.
7 *   Copyright 2004-2006 by Easy Software Products.
8 *
9 *   These coded instructions, statements, and computer programs are the
10 *   property of Apple Inc. and are protected by Federal copyright
11 *   law.  Distribution and use rights are outlined in the file "LICENSE.txt"
12 *   which should have been included with this file.  If this file is
13 *   file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * Contents:
16 *
17 *   main()        - Convert a man page to HTML.
18 *   putc_entity() - Put a single character, using entities as needed.
19 *   strmove()     - Move characters within a string.
20 */
21
22/*
23 * Include necessary headers.
24 */
25
26#include <cups/string-private.h>
27#include <unistd.h>
28
29
30/*
31 * Local functions...
32 */
33
34static void	putc_entity(int ch, FILE *fp);
35static void	strmove(char *d, const char *s);
36
37
38/*
39 * 'main()' - Convert a man page to HTML.
40 */
41
42int					/* O - Exit status */
43main(int  argc,				/* I - Number of command-line args */
44     char *argv[])			/* I - Command-line arguments */
45{
46  FILE		*infile,		/* Input file */
47		*outfile;		/* Output file */
48  char		line[1024],		/* Line from file */
49		*lineptr,		/* Pointer into line */
50		*endptr,		/* Pointer to end of current */
51		endchar,		/* End character */
52		*paren,			/* Pointer to parenthesis */
53		name[1024];		/* Man page name */
54  int		section,		/* Man page section */
55		pre,			/* Preformatted */
56		font,			/* Current font */
57		blist,			/* In a bullet list? */
58		list,			/* In a list? */
59		linenum;		/* Current line number */
60  const char 	*post;			/* Text to add after the current line */
61  static const char			/* Start/end tags for fonts */
62	* const start_fonts[] = { "", "<b>", "<i>" },
63	* const end_fonts[] = { "", "</b>", "</i>" };
64
65 /*
66  * Check arguments...
67  */
68
69  if (argc > 3)
70  {
71    fputs("Usage: mantohtml [filename.man [filename.html]]\n", stderr);
72    return (1);
73  }
74
75 /*
76  * Open files as needed...
77  */
78
79  if (argc > 1)
80  {
81    if ((infile = fopen(argv[1], "r")) == NULL)
82    {
83      perror(argv[1]);
84      return (1);
85    }
86  }
87  else
88    infile = stdin;
89
90  if (argc > 2)
91  {
92    if ((outfile = fopen(argv[2], "w")) == NULL)
93    {
94      perror(argv[2]);
95      fclose(infile);
96      return (1);
97    }
98  }
99  else
100    outfile = stdout;
101
102 /*
103  * Read from input and write the output...
104  */
105
106  fputs("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
107        "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
108        "<html>\n"
109	"<!-- SECTION: Man Pages -->\n"
110	"<head>\n"
111	"\t<link rel=\"stylesheet\" type=\"text/css\" "
112	"href=\"../cups-printable.css\">\n", outfile);
113
114  blist   = 0;
115  font    = 0;
116  list    = 0;
117  linenum = 0;
118  pre     = 0;
119  post    = NULL;
120  section = -1;
121
122  while (fgets(line, sizeof(line), infile))
123  {
124    linenum ++;
125
126    if (line[0] == '.')
127    {
128     /*
129      * Strip leading whitespace...
130      */
131
132      while (line[1] == ' ' || line[1] == '\t')
133        strmove(line + 1, line + 2);
134
135     /*
136      * Process man page commands...
137      */
138
139      if (!strncmp(line, ".TH ", 4) && section < 0)
140      {
141       /*
142        * Grab man page title...
143	*/
144
145        sscanf(line + 4, "%s%d", name, &section);
146
147        fprintf(outfile,
148	        "\t<title>%s(%d)</title>\n"
149	        "</head>\n"
150	        "<body>\n"
151		"<h1 class=\"title\">%s(%d)</h1>\n"
152		"%s",
153	        name, section, name, section, start_fonts[font]);
154      }
155      else if (section < 0)
156        continue;
157      else if (!strncmp(line, ".SH ", 4) || !strncmp(line, ".SS ", 4))
158      {
159       /*
160        * Grab heading...
161	*/
162
163        int first = 1;
164
165	fputs(end_fonts[font], outfile);
166
167        if (blist)
168	{
169	  fputs("</li>\n</ul>\n", outfile);
170	  blist = 0;
171	}
172
173        if (list)
174	{
175	  if (list == 1)
176	    fputs("</dt>\n", outfile);
177	  else if (list)
178	    fputs("</dd>\n", outfile);
179
180	  fputs("</dl>\n", outfile);
181	  list = 0;
182	}
183
184        line[strlen(line) - 1] = '\0';	/* Strip LF */
185
186        if (line[2] == 'H')
187	  fputs("<h2 class=\"title\"><a name=\"", outfile);
188	else
189	  fputs("<h3><a name=\"", outfile);
190
191        for (lineptr = line + 4; *lineptr; lineptr ++)
192	  if (*lineptr == '\"')
193	    continue;
194	  else if (*lineptr == ' ')
195	    putc_entity('_', outfile);
196	  else
197	    putc_entity(*lineptr, outfile);
198
199	fputs("\">", outfile);
200
201        for (lineptr = line + 4; *lineptr; lineptr ++)
202	  if (*lineptr == '\"')
203	    continue;
204	  else if (*lineptr == ' ')
205	  {
206	    putc_entity(' ', outfile);
207
208            first = 1;
209	  }
210	  else
211	  {
212	    if (first)
213	      putc_entity(*lineptr, outfile);
214	    else
215	      putc_entity(tolower(*lineptr), outfile);
216
217            first = 0;
218          }
219
220        if (line[2] == 'H')
221	  fprintf(outfile, "</a></h2>\n%s", start_fonts[font]);
222	else
223	  fprintf(outfile, "</a></h3>\n%s", start_fonts[font]);
224      }
225      else if (!strncmp(line, ".LP", 3) || !strncmp(line, ".PP", 3))
226      {
227       /*
228        * New paragraph...
229	*/
230
231	fputs(end_fonts[font], outfile);
232
233        if (blist)
234	{
235	  fputs("</li>\n</ul>\n", outfile);
236	  blist = 0;
237	}
238
239        if (list)
240	{
241	  if (list == 1)
242	    fputs("</dt>\n", outfile);
243	  else if (list)
244	    fputs("</dd>\n", outfile);
245
246	  fputs("</dl>\n", outfile);
247	  list = 0;
248	}
249
250	fputs("<p>", outfile);
251	font = 0;
252      }
253      else if (!strncmp(line, ".TP ", 4))
254      {
255       /*
256        * Grab list...
257	*/
258
259	fputs(end_fonts[font], outfile);
260
261        if (blist)
262	{
263	  fputs("</li>\n</ul>\n", outfile);
264	  blist = 0;
265	}
266
267        if (!list)
268	  fputs("<dl>\n", outfile);
269	else if (list == 1)
270	  fputs("</dt>\n", outfile);
271	else if (list)
272	  fputs("</dd>\n", outfile);
273
274	fputs("<dt>", outfile);
275	list = 1;
276	font = 0;
277      }
278      else if (!strncmp(line, ".br", 3))
279      {
280       /*
281        * Grab line break...
282	*/
283
284	if (list == 1)
285	{
286	  fputs("</dt>\n<dd>", outfile);
287	  list = 2;
288	}
289        else if (list)
290	  fputs("</dd>\n<dd>", outfile);
291	else
292	  fputs("<br>\n", outfile);
293      }
294      else if (!strncmp(line, ".de ", 4))
295      {
296       /*
297        * Define macro - ignore...
298	*/
299
300        while (fgets(line, sizeof(line), infile))
301	{
302	  linenum ++;
303
304	  if (!strncmp(line, "..", 2))
305	    break;
306	}
307      }
308      else if (!strncmp(line, ".RS", 3))
309      {
310       /*
311        * Indent...
312	*/
313
314        fputs("<div style='margin-left: 3em;'>\n", outfile);
315      }
316      else if (!strncmp(line, ".RE", 3))
317      {
318       /*
319        * Unindent...
320	*/
321
322        fputs("</div>\n", outfile);
323      }
324      else if (!strncmp(line, ".ds ", 4) || !strncmp(line, ".rm ", 4) ||
325               !strncmp(line, ".tr ", 4) || !strncmp(line, ".hy ", 4) ||
326               !strncmp(line, ".IX ", 4) || !strncmp(line, ".PD", 3) ||
327	       !strncmp(line, ".Sp", 3))
328      {
329       /*
330        * Ignore unused commands...
331	*/
332      }
333      else if (!strncmp(line, ".Vb", 3) || !strncmp(line, ".nf", 3))
334      {
335       /*
336        * Start preformatted...
337	*/
338
339        pre = 1;
340	fputs("<pre>\n", outfile);
341      }
342      else if (!strncmp(line, ".Ve", 3) || !strncmp(line, ".fi", 3))
343      {
344       /*
345        * End preformatted...
346	*/
347
348        if (pre)
349	{
350          pre = 0;
351	  fputs("</pre>\n", outfile);
352	}
353      }
354      else if (!strncmp(line, ".IP \\(bu", 8))
355      {
356       /*
357        * Bullet list...
358	*/
359
360        if (blist)
361	  fputs("</li>\n", outfile);
362	else
363	{
364	  fputs("<ul>\n", outfile);
365	  blist = 1;
366	}
367
368	fputs("<li>", outfile);
369      }
370      else if (!strncmp(line, ".IP ", 4))
371      {
372       /*
373        * Indented paragraph...
374	*/
375
376        if (blist)
377	{
378	  fputs("</li>\n</ul>\n", outfile);
379	  blist = 0;
380	}
381
382	fputs("<p style='margin-left: 3em;'>", outfile);
383
384        for (lineptr = line + 4; isspace(*lineptr); lineptr ++);
385
386        if (*lineptr == '\"')
387	{
388	  strmove(line, lineptr + 1);
389
390	  if ((lineptr = strchr(line, '\"')) != NULL)
391	    *lineptr = '\0';
392        }
393	else
394	{
395	  strmove(line, lineptr);
396
397	  if ((lineptr = strchr(line, ' ')) != NULL)
398	    *lineptr = '\0';
399        }
400
401       /*
402        * Process the text as if it was in-line...
403	*/
404
405        post = "\n<br>\n<br>";
406        goto process_text;
407      }
408      else if (!strncmp(line, ".\\}", 3))
409      {
410       /*
411        * Ignore close block...
412	*/
413      }
414      else if (!strncmp(line, ".ie", 3) || !strncmp(line, ".if", 3) ||
415               !strncmp(line, ".el", 3))
416      {
417       /*
418        * If/else - ignore...
419	*/
420
421        if (strchr(line, '{') != NULL)
422	{
423	 /*
424	  * Skip whole block...
425	  */
426
427          while (fgets(line, sizeof(line), infile))
428	  {
429	    linenum ++;
430
431	    if (strchr(line, '}') != NULL)
432	      break;
433          }
434	}
435      }
436#if 0
437      else if (!strncmp(line, ". ", 4))
438      {
439       /*
440        * Grab ...
441	*/
442      }
443#endif /* 0 */
444      else if (!strncmp(line, ".B ", 3))
445      {
446       /*
447        * Grab bold text...
448	*/
449
450	fprintf(outfile, "%s<b>%s</b>%s", end_fonts[font], line + 3,
451	        start_fonts[font]);
452      }
453      else if (!strncmp(line, ".I ", 3))
454      {
455       /*
456        * Grab italic text...
457	*/
458
459	fprintf(outfile, "%s<i>%s</i>%s", end_fonts[font], line + 3,
460	        start_fonts[font]);
461      }
462      else if (strncmp(line, ".\\\"", 3))
463      {
464       /*
465        * Unknown...
466	*/
467
468        if ((lineptr = strchr(line, ' ')) != NULL)
469	  *lineptr = '\0';
470	else if ((lineptr = strchr(line, '\n')) != NULL)
471	  *lineptr = '\0';
472
473        fprintf(stderr, "mantohtml: Unknown man page command \'%s\' on line %d!\n",
474	        line, linenum);
475      }
476
477     /*
478      * Skip continuation lines...
479      */
480
481      lineptr = line + strlen(line) - 2;
482      if (lineptr >= line && *lineptr == '\\')
483      {
484        while (fgets(line, sizeof(line), infile))
485	{
486	  linenum ++;
487	  lineptr = line + strlen(line) - 2;
488
489	  if (lineptr < line || *lineptr != '\\')
490	    break;
491	}
492      }
493    }
494    else
495    {
496     /*
497      * Process man page text...
498      */
499
500process_text:
501
502      for (lineptr = line; *lineptr; lineptr ++)
503      {
504        if (!strncmp(lineptr, "http://", 7))
505	{
506	 /*
507	  * Embed URL...
508	  */
509
510          for (endptr = lineptr + 7;
511	       *endptr && !isspace(*endptr & 255);
512	       endptr ++);
513
514          endchar = *endptr;
515	  *endptr = '\0';
516
517          fprintf(outfile, "<a href='%s'>%s</a>", lineptr, lineptr);
518	  *endptr = endchar;
519	  lineptr = endptr - 1;
520	}
521	else if (!strncmp(lineptr, "\\fI", 3) &&
522	         (endptr = strstr(lineptr, "\\fR")) != NULL &&
523		 (paren = strchr(lineptr, '(')) != NULL &&
524		 paren < endptr)
525        {
526	 /*
527	  * Link to man page?
528	  */
529
530          char	manfile[1024],		/* Man page filename */
531		manurl[1024];		/* Man page URL */
532
533
534         /*
535	  * See if the man file is available locally...
536	  */
537
538          lineptr += 3;
539	  endchar = *paren;
540	  *paren  = '\0';
541
542	  snprintf(manfile, sizeof(manfile), "%s.man", lineptr);
543	  snprintf(manurl, sizeof(manurl), "man-%s.html?TOPIC=Man+Pages",
544	           lineptr);
545
546	  *paren  = endchar;
547	  endchar = *endptr;
548	  *endptr = '\0';
549
550	  if (access(manfile, 0))
551	  {
552	   /*
553	    * Not a local man page, just do it italic...
554	    */
555
556	    fputs("<i>", outfile);
557	    while (*lineptr)
558	      putc_entity(*lineptr++, outfile);
559	    fputs("</i>", outfile);
560	  }
561	  else
562	  {
563	   /*
564	    * Local man page, do a link...
565	    */
566
567	    fprintf(outfile, "<a href='%s'>", manurl);
568	    while (*lineptr)
569	      putc_entity(*lineptr++, outfile);
570	    fputs("</a>", outfile);
571	  }
572
573          *endptr = endchar;
574	  lineptr = endptr + 2;
575	}
576        else if (*lineptr == '\\')
577	{
578	  lineptr ++;
579	  if (!*lineptr)
580	    break;
581	  else if (isdigit(lineptr[0]) && isdigit(lineptr[1]) &&
582	           isdigit(lineptr[2]))
583	  {
584	    fprintf(outfile, "&#%d;", ((lineptr[0] - '0') * 8 +
585	                               lineptr[1] - '0') * 8 +
586				      lineptr[2] - '0');
587	    lineptr += 2;
588	  }
589	  else if (*lineptr == '&')
590	    continue;
591	  else if (*lineptr == 's')
592	  {
593	    while (lineptr[1] == '-' || isdigit(lineptr[1]))
594	      lineptr ++;
595	  }
596	  else if (*lineptr == '*')
597	  {
598	    lineptr += 2;
599	  }
600	  else if (*lineptr != 'f')
601	    putc_entity(*lineptr, outfile);
602	  else
603	  {
604	    lineptr ++;
605	    if (!*lineptr)
606	      break;
607	    else
608	    {
609	      fputs(end_fonts[font], outfile);
610
611	      switch (*lineptr)
612	      {
613	        default : /* Regular */
614		    font = 0;
615		    break;
616	        case 'B' : /* Bold */
617		case 'b' :
618		    font = 1;
619		    break;
620	        case 'I' : /* Italic */
621		case 'i' :
622		    font = 2;
623		    break;
624	      }
625
626	      fputs(start_fonts[font], outfile);
627	    }
628	  }
629	}
630	else
631	  putc_entity(*lineptr, outfile);
632      }
633
634      if (post)
635      {
636        fputs(post, outfile);
637	post = NULL;
638      }
639
640      if (list == 1)
641      {
642	fputs("</dt>\n<dd>", outfile);
643	list = 2;
644      }
645    }
646  }
647
648  fprintf(outfile, "%s\n", end_fonts[font]);
649
650  if (blist)
651  {
652    fputs("</li>\n</ul>\n", outfile);
653  }
654
655  if (list)
656  {
657    if (list == 1)
658      fputs("</dt>\n", outfile);
659    else if (list)
660      fputs("</dd>\n", outfile);
661
662    fputs("</dl>\n", outfile);
663  }
664
665  fputs("</body>\n"
666        "</html>\n", outfile);
667
668 /*
669  * Close files...
670  */
671
672  if (infile != stdin)
673    fclose(infile);
674
675  if (outfile != stdout)
676    fclose(outfile);
677
678 /*
679  * Return with no errors...
680  */
681
682  return (0);
683}
684
685
686/*
687 * 'putc_entity()' - Put a single character, using entities as needed.
688 */
689
690static void
691putc_entity(int  ch,			/* I - Character */
692            FILE *fp)			/* I - File */
693{
694  if (ch == '&')
695    fputs("&amp;", fp);
696  else if (ch == '<')
697    fputs("&lt;", fp);
698  else
699    putc(ch, fp);
700}
701
702
703/*
704 * 'strmove()' - Move characters within a string.
705 */
706
707static void
708strmove(char       *d,			/* I - Destination */
709        const char *s)			/* I - Source */
710{
711  while (*s)
712    *d++ = *s++;
713
714  *d = '\0';
715}
716
717
718/*
719 * End of "$Id: mantohtml.c 11093 2013-07-03 20:48:42Z msweet $".
720 */
721