1/* 2 * "$Id: mantohtml.c 11093 2013-07-03 20:48:42Z msweet $" 3 * 4 * Man page to HTML conversion program. 5 * 6 * Copyright 2007-2010 by Apple Inc. 7 * Copyright 2004-2006 by Easy Software Products. 8 * 9 * These coded instructions, statements, and computer programs are the 10 * property of Apple Inc. and are protected by Federal copyright 11 * law. Distribution and use rights are outlined in the file "LICENSE.txt" 12 * which should have been included with this file. If this file is 13 * file is missing or damaged, see the license at "http://www.cups.org/". 14 * 15 * Contents: 16 * 17 * main() - Convert a man page to HTML. 18 * putc_entity() - Put a single character, using entities as needed. 19 * strmove() - Move characters within a string. 20 */ 21 22/* 23 * Include necessary headers. 24 */ 25 26#include <cups/string-private.h> 27#include <unistd.h> 28 29 30/* 31 * Local functions... 32 */ 33 34static void putc_entity(int ch, FILE *fp); 35static void strmove(char *d, const char *s); 36 37 38/* 39 * 'main()' - Convert a man page to HTML. 40 */ 41 42int /* O - Exit status */ 43main(int argc, /* I - Number of command-line args */ 44 char *argv[]) /* I - Command-line arguments */ 45{ 46 FILE *infile, /* Input file */ 47 *outfile; /* Output file */ 48 char line[1024], /* Line from file */ 49 *lineptr, /* Pointer into line */ 50 *endptr, /* Pointer to end of current */ 51 endchar, /* End character */ 52 *paren, /* Pointer to parenthesis */ 53 name[1024]; /* Man page name */ 54 int section, /* Man page section */ 55 pre, /* Preformatted */ 56 font, /* Current font */ 57 blist, /* In a bullet list? */ 58 list, /* In a list? */ 59 linenum; /* Current line number */ 60 const char *post; /* Text to add after the current line */ 61 static const char /* Start/end tags for fonts */ 62 * const start_fonts[] = { "", "<b>", "<i>" }, 63 * const end_fonts[] = { "", "</b>", "</i>" }; 64 65 /* 66 * Check arguments... 67 */ 68 69 if (argc > 3) 70 { 71 fputs("Usage: mantohtml [filename.man [filename.html]]\n", stderr); 72 return (1); 73 } 74 75 /* 76 * Open files as needed... 77 */ 78 79 if (argc > 1) 80 { 81 if ((infile = fopen(argv[1], "r")) == NULL) 82 { 83 perror(argv[1]); 84 return (1); 85 } 86 } 87 else 88 infile = stdin; 89 90 if (argc > 2) 91 { 92 if ((outfile = fopen(argv[2], "w")) == NULL) 93 { 94 perror(argv[2]); 95 fclose(infile); 96 return (1); 97 } 98 } 99 else 100 outfile = stdout; 101 102 /* 103 * Read from input and write the output... 104 */ 105 106 fputs("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" " 107 "\"http://www.w3.org/TR/html4/loose.dtd\">\n" 108 "<html>\n" 109 "<!-- SECTION: Man Pages -->\n" 110 "<head>\n" 111 "\t<link rel=\"stylesheet\" type=\"text/css\" " 112 "href=\"../cups-printable.css\">\n", outfile); 113 114 blist = 0; 115 font = 0; 116 list = 0; 117 linenum = 0; 118 pre = 0; 119 post = NULL; 120 section = -1; 121 122 while (fgets(line, sizeof(line), infile)) 123 { 124 linenum ++; 125 126 if (line[0] == '.') 127 { 128 /* 129 * Strip leading whitespace... 130 */ 131 132 while (line[1] == ' ' || line[1] == '\t') 133 strmove(line + 1, line + 2); 134 135 /* 136 * Process man page commands... 137 */ 138 139 if (!strncmp(line, ".TH ", 4) && section < 0) 140 { 141 /* 142 * Grab man page title... 143 */ 144 145 sscanf(line + 4, "%s%d", name, §ion); 146 147 fprintf(outfile, 148 "\t<title>%s(%d)</title>\n" 149 "</head>\n" 150 "<body>\n" 151 "<h1 class=\"title\">%s(%d)</h1>\n" 152 "%s", 153 name, section, name, section, start_fonts[font]); 154 } 155 else if (section < 0) 156 continue; 157 else if (!strncmp(line, ".SH ", 4) || !strncmp(line, ".SS ", 4)) 158 { 159 /* 160 * Grab heading... 161 */ 162 163 int first = 1; 164 165 fputs(end_fonts[font], outfile); 166 167 if (blist) 168 { 169 fputs("</li>\n</ul>\n", outfile); 170 blist = 0; 171 } 172 173 if (list) 174 { 175 if (list == 1) 176 fputs("</dt>\n", outfile); 177 else if (list) 178 fputs("</dd>\n", outfile); 179 180 fputs("</dl>\n", outfile); 181 list = 0; 182 } 183 184 line[strlen(line) - 1] = '\0'; /* Strip LF */ 185 186 if (line[2] == 'H') 187 fputs("<h2 class=\"title\"><a name=\"", outfile); 188 else 189 fputs("<h3><a name=\"", outfile); 190 191 for (lineptr = line + 4; *lineptr; lineptr ++) 192 if (*lineptr == '\"') 193 continue; 194 else if (*lineptr == ' ') 195 putc_entity('_', outfile); 196 else 197 putc_entity(*lineptr, outfile); 198 199 fputs("\">", outfile); 200 201 for (lineptr = line + 4; *lineptr; lineptr ++) 202 if (*lineptr == '\"') 203 continue; 204 else if (*lineptr == ' ') 205 { 206 putc_entity(' ', outfile); 207 208 first = 1; 209 } 210 else 211 { 212 if (first) 213 putc_entity(*lineptr, outfile); 214 else 215 putc_entity(tolower(*lineptr), outfile); 216 217 first = 0; 218 } 219 220 if (line[2] == 'H') 221 fprintf(outfile, "</a></h2>\n%s", start_fonts[font]); 222 else 223 fprintf(outfile, "</a></h3>\n%s", start_fonts[font]); 224 } 225 else if (!strncmp(line, ".LP", 3) || !strncmp(line, ".PP", 3)) 226 { 227 /* 228 * New paragraph... 229 */ 230 231 fputs(end_fonts[font], outfile); 232 233 if (blist) 234 { 235 fputs("</li>\n</ul>\n", outfile); 236 blist = 0; 237 } 238 239 if (list) 240 { 241 if (list == 1) 242 fputs("</dt>\n", outfile); 243 else if (list) 244 fputs("</dd>\n", outfile); 245 246 fputs("</dl>\n", outfile); 247 list = 0; 248 } 249 250 fputs("<p>", outfile); 251 font = 0; 252 } 253 else if (!strncmp(line, ".TP ", 4)) 254 { 255 /* 256 * Grab list... 257 */ 258 259 fputs(end_fonts[font], outfile); 260 261 if (blist) 262 { 263 fputs("</li>\n</ul>\n", outfile); 264 blist = 0; 265 } 266 267 if (!list) 268 fputs("<dl>\n", outfile); 269 else if (list == 1) 270 fputs("</dt>\n", outfile); 271 else if (list) 272 fputs("</dd>\n", outfile); 273 274 fputs("<dt>", outfile); 275 list = 1; 276 font = 0; 277 } 278 else if (!strncmp(line, ".br", 3)) 279 { 280 /* 281 * Grab line break... 282 */ 283 284 if (list == 1) 285 { 286 fputs("</dt>\n<dd>", outfile); 287 list = 2; 288 } 289 else if (list) 290 fputs("</dd>\n<dd>", outfile); 291 else 292 fputs("<br>\n", outfile); 293 } 294 else if (!strncmp(line, ".de ", 4)) 295 { 296 /* 297 * Define macro - ignore... 298 */ 299 300 while (fgets(line, sizeof(line), infile)) 301 { 302 linenum ++; 303 304 if (!strncmp(line, "..", 2)) 305 break; 306 } 307 } 308 else if (!strncmp(line, ".RS", 3)) 309 { 310 /* 311 * Indent... 312 */ 313 314 fputs("<div style='margin-left: 3em;'>\n", outfile); 315 } 316 else if (!strncmp(line, ".RE", 3)) 317 { 318 /* 319 * Unindent... 320 */ 321 322 fputs("</div>\n", outfile); 323 } 324 else if (!strncmp(line, ".ds ", 4) || !strncmp(line, ".rm ", 4) || 325 !strncmp(line, ".tr ", 4) || !strncmp(line, ".hy ", 4) || 326 !strncmp(line, ".IX ", 4) || !strncmp(line, ".PD", 3) || 327 !strncmp(line, ".Sp", 3)) 328 { 329 /* 330 * Ignore unused commands... 331 */ 332 } 333 else if (!strncmp(line, ".Vb", 3) || !strncmp(line, ".nf", 3)) 334 { 335 /* 336 * Start preformatted... 337 */ 338 339 pre = 1; 340 fputs("<pre>\n", outfile); 341 } 342 else if (!strncmp(line, ".Ve", 3) || !strncmp(line, ".fi", 3)) 343 { 344 /* 345 * End preformatted... 346 */ 347 348 if (pre) 349 { 350 pre = 0; 351 fputs("</pre>\n", outfile); 352 } 353 } 354 else if (!strncmp(line, ".IP \\(bu", 8)) 355 { 356 /* 357 * Bullet list... 358 */ 359 360 if (blist) 361 fputs("</li>\n", outfile); 362 else 363 { 364 fputs("<ul>\n", outfile); 365 blist = 1; 366 } 367 368 fputs("<li>", outfile); 369 } 370 else if (!strncmp(line, ".IP ", 4)) 371 { 372 /* 373 * Indented paragraph... 374 */ 375 376 if (blist) 377 { 378 fputs("</li>\n</ul>\n", outfile); 379 blist = 0; 380 } 381 382 fputs("<p style='margin-left: 3em;'>", outfile); 383 384 for (lineptr = line + 4; isspace(*lineptr); lineptr ++); 385 386 if (*lineptr == '\"') 387 { 388 strmove(line, lineptr + 1); 389 390 if ((lineptr = strchr(line, '\"')) != NULL) 391 *lineptr = '\0'; 392 } 393 else 394 { 395 strmove(line, lineptr); 396 397 if ((lineptr = strchr(line, ' ')) != NULL) 398 *lineptr = '\0'; 399 } 400 401 /* 402 * Process the text as if it was in-line... 403 */ 404 405 post = "\n<br>\n<br>"; 406 goto process_text; 407 } 408 else if (!strncmp(line, ".\\}", 3)) 409 { 410 /* 411 * Ignore close block... 412 */ 413 } 414 else if (!strncmp(line, ".ie", 3) || !strncmp(line, ".if", 3) || 415 !strncmp(line, ".el", 3)) 416 { 417 /* 418 * If/else - ignore... 419 */ 420 421 if (strchr(line, '{') != NULL) 422 { 423 /* 424 * Skip whole block... 425 */ 426 427 while (fgets(line, sizeof(line), infile)) 428 { 429 linenum ++; 430 431 if (strchr(line, '}') != NULL) 432 break; 433 } 434 } 435 } 436#if 0 437 else if (!strncmp(line, ". ", 4)) 438 { 439 /* 440 * Grab ... 441 */ 442 } 443#endif /* 0 */ 444 else if (!strncmp(line, ".B ", 3)) 445 { 446 /* 447 * Grab bold text... 448 */ 449 450 fprintf(outfile, "%s<b>%s</b>%s", end_fonts[font], line + 3, 451 start_fonts[font]); 452 } 453 else if (!strncmp(line, ".I ", 3)) 454 { 455 /* 456 * Grab italic text... 457 */ 458 459 fprintf(outfile, "%s<i>%s</i>%s", end_fonts[font], line + 3, 460 start_fonts[font]); 461 } 462 else if (strncmp(line, ".\\\"", 3)) 463 { 464 /* 465 * Unknown... 466 */ 467 468 if ((lineptr = strchr(line, ' ')) != NULL) 469 *lineptr = '\0'; 470 else if ((lineptr = strchr(line, '\n')) != NULL) 471 *lineptr = '\0'; 472 473 fprintf(stderr, "mantohtml: Unknown man page command \'%s\' on line %d!\n", 474 line, linenum); 475 } 476 477 /* 478 * Skip continuation lines... 479 */ 480 481 lineptr = line + strlen(line) - 2; 482 if (lineptr >= line && *lineptr == '\\') 483 { 484 while (fgets(line, sizeof(line), infile)) 485 { 486 linenum ++; 487 lineptr = line + strlen(line) - 2; 488 489 if (lineptr < line || *lineptr != '\\') 490 break; 491 } 492 } 493 } 494 else 495 { 496 /* 497 * Process man page text... 498 */ 499 500process_text: 501 502 for (lineptr = line; *lineptr; lineptr ++) 503 { 504 if (!strncmp(lineptr, "http://", 7)) 505 { 506 /* 507 * Embed URL... 508 */ 509 510 for (endptr = lineptr + 7; 511 *endptr && !isspace(*endptr & 255); 512 endptr ++); 513 514 endchar = *endptr; 515 *endptr = '\0'; 516 517 fprintf(outfile, "<a href='%s'>%s</a>", lineptr, lineptr); 518 *endptr = endchar; 519 lineptr = endptr - 1; 520 } 521 else if (!strncmp(lineptr, "\\fI", 3) && 522 (endptr = strstr(lineptr, "\\fR")) != NULL && 523 (paren = strchr(lineptr, '(')) != NULL && 524 paren < endptr) 525 { 526 /* 527 * Link to man page? 528 */ 529 530 char manfile[1024], /* Man page filename */ 531 manurl[1024]; /* Man page URL */ 532 533 534 /* 535 * See if the man file is available locally... 536 */ 537 538 lineptr += 3; 539 endchar = *paren; 540 *paren = '\0'; 541 542 snprintf(manfile, sizeof(manfile), "%s.man", lineptr); 543 snprintf(manurl, sizeof(manurl), "man-%s.html?TOPIC=Man+Pages", 544 lineptr); 545 546 *paren = endchar; 547 endchar = *endptr; 548 *endptr = '\0'; 549 550 if (access(manfile, 0)) 551 { 552 /* 553 * Not a local man page, just do it italic... 554 */ 555 556 fputs("<i>", outfile); 557 while (*lineptr) 558 putc_entity(*lineptr++, outfile); 559 fputs("</i>", outfile); 560 } 561 else 562 { 563 /* 564 * Local man page, do a link... 565 */ 566 567 fprintf(outfile, "<a href='%s'>", manurl); 568 while (*lineptr) 569 putc_entity(*lineptr++, outfile); 570 fputs("</a>", outfile); 571 } 572 573 *endptr = endchar; 574 lineptr = endptr + 2; 575 } 576 else if (*lineptr == '\\') 577 { 578 lineptr ++; 579 if (!*lineptr) 580 break; 581 else if (isdigit(lineptr[0]) && isdigit(lineptr[1]) && 582 isdigit(lineptr[2])) 583 { 584 fprintf(outfile, "&#%d;", ((lineptr[0] - '0') * 8 + 585 lineptr[1] - '0') * 8 + 586 lineptr[2] - '0'); 587 lineptr += 2; 588 } 589 else if (*lineptr == '&') 590 continue; 591 else if (*lineptr == 's') 592 { 593 while (lineptr[1] == '-' || isdigit(lineptr[1])) 594 lineptr ++; 595 } 596 else if (*lineptr == '*') 597 { 598 lineptr += 2; 599 } 600 else if (*lineptr != 'f') 601 putc_entity(*lineptr, outfile); 602 else 603 { 604 lineptr ++; 605 if (!*lineptr) 606 break; 607 else 608 { 609 fputs(end_fonts[font], outfile); 610 611 switch (*lineptr) 612 { 613 default : /* Regular */ 614 font = 0; 615 break; 616 case 'B' : /* Bold */ 617 case 'b' : 618 font = 1; 619 break; 620 case 'I' : /* Italic */ 621 case 'i' : 622 font = 2; 623 break; 624 } 625 626 fputs(start_fonts[font], outfile); 627 } 628 } 629 } 630 else 631 putc_entity(*lineptr, outfile); 632 } 633 634 if (post) 635 { 636 fputs(post, outfile); 637 post = NULL; 638 } 639 640 if (list == 1) 641 { 642 fputs("</dt>\n<dd>", outfile); 643 list = 2; 644 } 645 } 646 } 647 648 fprintf(outfile, "%s\n", end_fonts[font]); 649 650 if (blist) 651 { 652 fputs("</li>\n</ul>\n", outfile); 653 } 654 655 if (list) 656 { 657 if (list == 1) 658 fputs("</dt>\n", outfile); 659 else if (list) 660 fputs("</dd>\n", outfile); 661 662 fputs("</dl>\n", outfile); 663 } 664 665 fputs("</body>\n" 666 "</html>\n", outfile); 667 668 /* 669 * Close files... 670 */ 671 672 if (infile != stdin) 673 fclose(infile); 674 675 if (outfile != stdout) 676 fclose(outfile); 677 678 /* 679 * Return with no errors... 680 */ 681 682 return (0); 683} 684 685 686/* 687 * 'putc_entity()' - Put a single character, using entities as needed. 688 */ 689 690static void 691putc_entity(int ch, /* I - Character */ 692 FILE *fp) /* I - File */ 693{ 694 if (ch == '&') 695 fputs("&", fp); 696 else if (ch == '<') 697 fputs("<", fp); 698 else 699 putc(ch, fp); 700} 701 702 703/* 704 * 'strmove()' - Move characters within a string. 705 */ 706 707static void 708strmove(char *d, /* I - Destination */ 709 const char *s) /* I - Source */ 710{ 711 while (*s) 712 *d++ = *s++; 713 714 *d = '\0'; 715} 716 717 718/* 719 * End of "$Id: mantohtml.c 11093 2013-07-03 20:48:42Z msweet $". 720 */ 721