man.c revision 241675
1219820Sjeff/* $Id: man.c,v 1.115 2012/01/03 15:16:24 kristaps Exp $ */ 2219820Sjeff/* 3219820Sjeff * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4219820Sjeff * 5219820Sjeff * Permission to use, copy, modify, and distribute this software for any 6219820Sjeff * purpose with or without fee is hereby granted, provided that the above 7219820Sjeff * copyright notice and this permission notice appear in all copies. 8219820Sjeff * 9219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10219820Sjeff * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11219820Sjeff * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12219820Sjeff * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13219820Sjeff * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14219820Sjeff * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15219820Sjeff * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16219820Sjeff */ 17219820Sjeff#ifdef HAVE_CONFIG_H 18219820Sjeff#include "config.h" 19219820Sjeff#endif 20219820Sjeff 21219820Sjeff#include <sys/types.h> 22219820Sjeff 23219820Sjeff#include <assert.h> 24219820Sjeff#include <stdarg.h> 25219820Sjeff#include <stdlib.h> 26219820Sjeff#include <stdio.h> 27219820Sjeff#include <string.h> 28219820Sjeff 29219820Sjeff#include "man.h" 30219820Sjeff#include "mandoc.h" 31219820Sjeff#include "libman.h" 32219820Sjeff#include "libmandoc.h" 33219820Sjeff 34219820Sjeffconst char *const __man_macronames[MAN_MAX] = { 35219820Sjeff "br", "TH", "SH", "SS", 36219820Sjeff "TP", "LP", "PP", "P", 37219820Sjeff "IP", "HP", "SM", "SB", 38219820Sjeff "BI", "IB", "BR", "RB", 39219820Sjeff "R", "B", "I", "IR", 40219820Sjeff "RI", "na", "sp", "nf", 41219820Sjeff "fi", "RE", "RS", "DT", 42219820Sjeff "UC", "PD", "AT", "in", 43219820Sjeff "ft", "OP" 44219820Sjeff }; 45219820Sjeff 46219820Sjeffconst char * const *man_macronames = __man_macronames; 47219820Sjeff 48219820Sjeffstatic struct man_node *man_node_alloc(struct man *, int, int, 49219820Sjeff enum man_type, enum mant); 50219820Sjeffstatic int man_node_append(struct man *, 51219820Sjeff struct man_node *); 52219820Sjeffstatic void man_node_free(struct man_node *); 53219820Sjeffstatic void man_node_unlink(struct man *, 54219820Sjeff struct man_node *); 55219820Sjeffstatic int man_ptext(struct man *, int, char *, int); 56219820Sjeffstatic int man_pmacro(struct man *, int, char *, int); 57219820Sjeffstatic void man_free1(struct man *); 58219820Sjeffstatic void man_alloc1(struct man *); 59219820Sjeffstatic int man_descope(struct man *, int, int); 60219820Sjeff 61219820Sjeff 62219820Sjeffconst struct man_node * 63219820Sjeffman_node(const struct man *m) 64219820Sjeff{ 65219820Sjeff 66219820Sjeff assert( ! (MAN_HALT & m->flags)); 67219820Sjeff return(m->first); 68219820Sjeff} 69219820Sjeff 70219820Sjeff 71219820Sjeffconst struct man_meta * 72219820Sjeffman_meta(const struct man *m) 73219820Sjeff{ 74219820Sjeff 75219820Sjeff assert( ! (MAN_HALT & m->flags)); 76219820Sjeff return(&m->meta); 77219820Sjeff} 78219820Sjeff 79219820Sjeff 80219820Sjeffvoid 81219820Sjeffman_reset(struct man *man) 82219820Sjeff{ 83219820Sjeff 84219820Sjeff man_free1(man); 85219820Sjeff man_alloc1(man); 86219820Sjeff} 87219820Sjeff 88219820Sjeff 89219820Sjeffvoid 90219820Sjeffman_free(struct man *man) 91219820Sjeff{ 92219820Sjeff 93219820Sjeff man_free1(man); 94219820Sjeff free(man); 95219820Sjeff} 96219820Sjeff 97219820Sjeff 98219820Sjeffstruct man * 99219820Sjeffman_alloc(struct roff *roff, struct mparse *parse) 100219820Sjeff{ 101219820Sjeff struct man *p; 102219820Sjeff 103219820Sjeff p = mandoc_calloc(1, sizeof(struct man)); 104219820Sjeff 105219820Sjeff man_hash_init(); 106219820Sjeff p->parse = parse; 107219820Sjeff p->roff = roff; 108219820Sjeff 109219820Sjeff man_alloc1(p); 110219820Sjeff return(p); 111219820Sjeff} 112219820Sjeff 113219820Sjeff 114219820Sjeffint 115219820Sjeffman_endparse(struct man *m) 116219820Sjeff{ 117219820Sjeff 118219820Sjeff assert( ! (MAN_HALT & m->flags)); 119219820Sjeff if (man_macroend(m)) 120219820Sjeff return(1); 121219820Sjeff m->flags |= MAN_HALT; 122 return(0); 123} 124 125 126int 127man_parseln(struct man *m, int ln, char *buf, int offs) 128{ 129 130 m->flags |= MAN_NEWLINE; 131 132 assert( ! (MAN_HALT & m->flags)); 133 134 return (mandoc_getcontrol(buf, &offs) ? 135 man_pmacro(m, ln, buf, offs) : 136 man_ptext(m, ln, buf, offs)); 137} 138 139 140static void 141man_free1(struct man *man) 142{ 143 144 if (man->first) 145 man_node_delete(man, man->first); 146 if (man->meta.title) 147 free(man->meta.title); 148 if (man->meta.source) 149 free(man->meta.source); 150 if (man->meta.date) 151 free(man->meta.date); 152 if (man->meta.vol) 153 free(man->meta.vol); 154 if (man->meta.msec) 155 free(man->meta.msec); 156} 157 158 159static void 160man_alloc1(struct man *m) 161{ 162 163 memset(&m->meta, 0, sizeof(struct man_meta)); 164 m->flags = 0; 165 m->last = mandoc_calloc(1, sizeof(struct man_node)); 166 m->first = m->last; 167 m->last->type = MAN_ROOT; 168 m->last->tok = MAN_MAX; 169 m->next = MAN_NEXT_CHILD; 170} 171 172 173static int 174man_node_append(struct man *man, struct man_node *p) 175{ 176 177 assert(man->last); 178 assert(man->first); 179 assert(MAN_ROOT != p->type); 180 181 switch (man->next) { 182 case (MAN_NEXT_SIBLING): 183 man->last->next = p; 184 p->prev = man->last; 185 p->parent = man->last->parent; 186 break; 187 case (MAN_NEXT_CHILD): 188 man->last->child = p; 189 p->parent = man->last; 190 break; 191 default: 192 abort(); 193 /* NOTREACHED */ 194 } 195 196 assert(p->parent); 197 p->parent->nchild++; 198 199 if ( ! man_valid_pre(man, p)) 200 return(0); 201 202 switch (p->type) { 203 case (MAN_HEAD): 204 assert(MAN_BLOCK == p->parent->type); 205 p->parent->head = p; 206 break; 207 case (MAN_TAIL): 208 assert(MAN_BLOCK == p->parent->type); 209 p->parent->tail = p; 210 break; 211 case (MAN_BODY): 212 assert(MAN_BLOCK == p->parent->type); 213 p->parent->body = p; 214 break; 215 default: 216 break; 217 } 218 219 man->last = p; 220 221 switch (p->type) { 222 case (MAN_TBL): 223 /* FALLTHROUGH */ 224 case (MAN_TEXT): 225 if ( ! man_valid_post(man)) 226 return(0); 227 break; 228 default: 229 break; 230 } 231 232 return(1); 233} 234 235 236static struct man_node * 237man_node_alloc(struct man *m, int line, int pos, 238 enum man_type type, enum mant tok) 239{ 240 struct man_node *p; 241 242 p = mandoc_calloc(1, sizeof(struct man_node)); 243 p->line = line; 244 p->pos = pos; 245 p->type = type; 246 p->tok = tok; 247 248 if (MAN_NEWLINE & m->flags) 249 p->flags |= MAN_LINE; 250 m->flags &= ~MAN_NEWLINE; 251 return(p); 252} 253 254 255int 256man_elem_alloc(struct man *m, int line, int pos, enum mant tok) 257{ 258 struct man_node *p; 259 260 p = man_node_alloc(m, line, pos, MAN_ELEM, tok); 261 if ( ! man_node_append(m, p)) 262 return(0); 263 m->next = MAN_NEXT_CHILD; 264 return(1); 265} 266 267 268int 269man_tail_alloc(struct man *m, int line, int pos, enum mant tok) 270{ 271 struct man_node *p; 272 273 p = man_node_alloc(m, line, pos, MAN_TAIL, tok); 274 if ( ! man_node_append(m, p)) 275 return(0); 276 m->next = MAN_NEXT_CHILD; 277 return(1); 278} 279 280 281int 282man_head_alloc(struct man *m, int line, int pos, enum mant tok) 283{ 284 struct man_node *p; 285 286 p = man_node_alloc(m, line, pos, MAN_HEAD, tok); 287 if ( ! man_node_append(m, p)) 288 return(0); 289 m->next = MAN_NEXT_CHILD; 290 return(1); 291} 292 293 294int 295man_body_alloc(struct man *m, int line, int pos, enum mant tok) 296{ 297 struct man_node *p; 298 299 p = man_node_alloc(m, line, pos, MAN_BODY, tok); 300 if ( ! man_node_append(m, p)) 301 return(0); 302 m->next = MAN_NEXT_CHILD; 303 return(1); 304} 305 306 307int 308man_block_alloc(struct man *m, int line, int pos, enum mant tok) 309{ 310 struct man_node *p; 311 312 p = man_node_alloc(m, line, pos, MAN_BLOCK, tok); 313 if ( ! man_node_append(m, p)) 314 return(0); 315 m->next = MAN_NEXT_CHILD; 316 return(1); 317} 318 319int 320man_word_alloc(struct man *m, int line, int pos, const char *word) 321{ 322 struct man_node *n; 323 324 n = man_node_alloc(m, line, pos, MAN_TEXT, MAN_MAX); 325 n->string = roff_strdup(m->roff, word); 326 327 if ( ! man_node_append(m, n)) 328 return(0); 329 330 m->next = MAN_NEXT_SIBLING; 331 return(1); 332} 333 334 335/* 336 * Free all of the resources held by a node. This does NOT unlink a 337 * node from its context; for that, see man_node_unlink(). 338 */ 339static void 340man_node_free(struct man_node *p) 341{ 342 343 if (p->string) 344 free(p->string); 345 free(p); 346} 347 348 349void 350man_node_delete(struct man *m, struct man_node *p) 351{ 352 353 while (p->child) 354 man_node_delete(m, p->child); 355 356 man_node_unlink(m, p); 357 man_node_free(p); 358} 359 360int 361man_addeqn(struct man *m, const struct eqn *ep) 362{ 363 struct man_node *n; 364 365 assert( ! (MAN_HALT & m->flags)); 366 367 n = man_node_alloc(m, ep->ln, ep->pos, MAN_EQN, MAN_MAX); 368 n->eqn = ep; 369 370 if ( ! man_node_append(m, n)) 371 return(0); 372 373 m->next = MAN_NEXT_SIBLING; 374 return(man_descope(m, ep->ln, ep->pos)); 375} 376 377int 378man_addspan(struct man *m, const struct tbl_span *sp) 379{ 380 struct man_node *n; 381 382 assert( ! (MAN_HALT & m->flags)); 383 384 n = man_node_alloc(m, sp->line, 0, MAN_TBL, MAN_MAX); 385 n->span = sp; 386 387 if ( ! man_node_append(m, n)) 388 return(0); 389 390 m->next = MAN_NEXT_SIBLING; 391 return(man_descope(m, sp->line, 0)); 392} 393 394static int 395man_descope(struct man *m, int line, int offs) 396{ 397 /* 398 * Co-ordinate what happens with having a next-line scope open: 399 * first close out the element scope (if applicable), then close 400 * out the block scope (also if applicable). 401 */ 402 403 if (MAN_ELINE & m->flags) { 404 m->flags &= ~MAN_ELINE; 405 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 406 return(0); 407 } 408 409 if ( ! (MAN_BLINE & m->flags)) 410 return(1); 411 m->flags &= ~MAN_BLINE; 412 413 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 414 return(0); 415 return(man_body_alloc(m, line, offs, m->last->tok)); 416} 417 418static int 419man_ptext(struct man *m, int line, char *buf, int offs) 420{ 421 int i; 422 423 /* Literal free-form text whitespace is preserved. */ 424 425 if (MAN_LITERAL & m->flags) { 426 if ( ! man_word_alloc(m, line, offs, buf + offs)) 427 return(0); 428 return(man_descope(m, line, offs)); 429 } 430 431 /* Pump blank lines directly into the backend. */ 432 433 for (i = offs; ' ' == buf[i]; i++) 434 /* Skip leading whitespace. */ ; 435 436 if ('\0' == buf[i]) { 437 /* Allocate a blank entry. */ 438 if ( ! man_word_alloc(m, line, offs, "")) 439 return(0); 440 return(man_descope(m, line, offs)); 441 } 442 443 /* 444 * Warn if the last un-escaped character is whitespace. Then 445 * strip away the remaining spaces (tabs stay!). 446 */ 447 448 i = (int)strlen(buf); 449 assert(i); 450 451 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 452 if (i > 1 && '\\' != buf[i - 2]) 453 man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE); 454 455 for (--i; i && ' ' == buf[i]; i--) 456 /* Spin back to non-space. */ ; 457 458 /* Jump ahead of escaped whitespace. */ 459 i += '\\' == buf[i] ? 2 : 1; 460 461 buf[i] = '\0'; 462 } 463 464 if ( ! man_word_alloc(m, line, offs, buf + offs)) 465 return(0); 466 467 /* 468 * End-of-sentence check. If the last character is an unescaped 469 * EOS character, then flag the node as being the end of a 470 * sentence. The front-end will know how to interpret this. 471 */ 472 473 assert(i); 474 if (mandoc_eos(buf, (size_t)i, 0)) 475 m->last->flags |= MAN_EOS; 476 477 return(man_descope(m, line, offs)); 478} 479 480static int 481man_pmacro(struct man *m, int ln, char *buf, int offs) 482{ 483 int i, ppos; 484 enum mant tok; 485 char mac[5]; 486 struct man_node *n; 487 488 if ('"' == buf[offs]) { 489 man_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT); 490 return(1); 491 } else if ('\0' == buf[offs]) 492 return(1); 493 494 ppos = offs; 495 496 /* 497 * Copy the first word into a nil-terminated buffer. 498 * Stop copying when a tab, space, or eoln is encountered. 499 */ 500 501 i = 0; 502 while (i < 4 && '\0' != buf[offs] && 503 ' ' != buf[offs] && '\t' != buf[offs]) 504 mac[i++] = buf[offs++]; 505 506 mac[i] = '\0'; 507 508 tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX; 509 510 if (MAN_MAX == tok) { 511 mandoc_vmsg(MANDOCERR_MACRO, m->parse, ln, 512 ppos, "%s", buf + ppos - 1); 513 return(1); 514 } 515 516 /* The macro is sane. Jump to the next word. */ 517 518 while (buf[offs] && ' ' == buf[offs]) 519 offs++; 520 521 /* 522 * Trailing whitespace. Note that tabs are allowed to be passed 523 * into the parser as "text", so we only warn about spaces here. 524 */ 525 526 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 527 man_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE); 528 529 /* 530 * Remove prior ELINE macro, as it's being clobbered by a new 531 * macro. Note that NSCOPED macros do not close out ELINE 532 * macros---they don't print text---so we let those slip by. 533 */ 534 535 if ( ! (MAN_NSCOPED & man_macros[tok].flags) && 536 m->flags & MAN_ELINE) { 537 n = m->last; 538 assert(MAN_TEXT != n->type); 539 540 /* Remove repeated NSCOPED macros causing ELINE. */ 541 542 if (MAN_NSCOPED & man_macros[n->tok].flags) 543 n = n->parent; 544 545 mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line, 546 n->pos, "%s breaks %s", man_macronames[tok], 547 man_macronames[n->tok]); 548 549 man_node_delete(m, n); 550 m->flags &= ~MAN_ELINE; 551 } 552 553 /* 554 * Remove prior BLINE macro that is being clobbered. 555 */ 556 if ((m->flags & MAN_BLINE) && 557 (MAN_BSCOPE & man_macros[tok].flags)) { 558 n = m->last; 559 560 /* Might be a text node like 8 in 561 * .TP 8 562 * .SH foo 563 */ 564 if (MAN_TEXT == n->type) 565 n = n->parent; 566 567 /* Remove element that didn't end BLINE, if any. */ 568 if ( ! (MAN_BSCOPE & man_macros[n->tok].flags)) 569 n = n->parent; 570 571 assert(MAN_HEAD == n->type); 572 n = n->parent; 573 assert(MAN_BLOCK == n->type); 574 assert(MAN_SCOPED & man_macros[n->tok].flags); 575 576 mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line, 577 n->pos, "%s breaks %s", man_macronames[tok], 578 man_macronames[n->tok]); 579 580 man_node_delete(m, n); 581 m->flags &= ~MAN_BLINE; 582 } 583 584 /* 585 * Save the fact that we're in the next-line for a block. In 586 * this way, embedded roff instructions can "remember" state 587 * when they exit. 588 */ 589 590 if (MAN_BLINE & m->flags) 591 m->flags |= MAN_BPLINE; 592 593 /* Call to handler... */ 594 595 assert(man_macros[tok].fp); 596 if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &offs, buf)) 597 goto err; 598 599 /* 600 * We weren't in a block-line scope when entering the 601 * above-parsed macro, so return. 602 */ 603 604 if ( ! (MAN_BPLINE & m->flags)) { 605 m->flags &= ~MAN_ILINE; 606 return(1); 607 } 608 m->flags &= ~MAN_BPLINE; 609 610 /* 611 * If we're in a block scope, then allow this macro to slip by 612 * without closing scope around it. 613 */ 614 615 if (MAN_ILINE & m->flags) { 616 m->flags &= ~MAN_ILINE; 617 return(1); 618 } 619 620 /* 621 * If we've opened a new next-line element scope, then return 622 * now, as the next line will close out the block scope. 623 */ 624 625 if (MAN_ELINE & m->flags) 626 return(1); 627 628 /* Close out the block scope opened in the prior line. */ 629 630 assert(MAN_BLINE & m->flags); 631 m->flags &= ~MAN_BLINE; 632 633 if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) 634 return(0); 635 return(man_body_alloc(m, ln, ppos, m->last->tok)); 636 637err: /* Error out. */ 638 639 m->flags |= MAN_HALT; 640 return(0); 641} 642 643/* 644 * Unlink a node from its context. If "m" is provided, the last parse 645 * point will also be adjusted accordingly. 646 */ 647static void 648man_node_unlink(struct man *m, struct man_node *n) 649{ 650 651 /* Adjust siblings. */ 652 653 if (n->prev) 654 n->prev->next = n->next; 655 if (n->next) 656 n->next->prev = n->prev; 657 658 /* Adjust parent. */ 659 660 if (n->parent) { 661 n->parent->nchild--; 662 if (n->parent->child == n) 663 n->parent->child = n->prev ? n->prev : n->next; 664 } 665 666 /* Adjust parse point, if applicable. */ 667 668 if (m && m->last == n) { 669 /*XXX: this can occur when bailing from validation. */ 670 /*assert(NULL == n->next);*/ 671 if (n->prev) { 672 m->last = n->prev; 673 m->next = MAN_NEXT_SIBLING; 674 } else { 675 m->last = n->parent; 676 m->next = MAN_NEXT_CHILD; 677 } 678 } 679 680 if (m && m->first == n) 681 m->first = NULL; 682} 683 684const struct mparse * 685man_mparse(const struct man *m) 686{ 687 688 assert(m && m->parse); 689 return(m->parse); 690} 691