mdoc.c revision 1.111
1/* $Id: mdoc.c,v 1.111 2014/08/08 15:32:17 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/types.h> 19 20#include <assert.h> 21#include <ctype.h> 22#include <stdarg.h> 23#include <stdio.h> 24#include <stdlib.h> 25#include <string.h> 26#include <time.h> 27 28#include "mdoc.h" 29#include "mandoc.h" 30#include "mandoc_aux.h" 31#include "libmdoc.h" 32#include "libmandoc.h" 33 34const char *const __mdoc_macronames[MDOC_MAX + 1] = { 35 "Ap", "Dd", "Dt", "Os", 36 "Sh", "Ss", "Pp", "D1", 37 "Dl", "Bd", "Ed", "Bl", 38 "El", "It", "Ad", "An", 39 "Ar", "Cd", "Cm", "Dv", 40 "Er", "Ev", "Ex", "Fa", 41 "Fd", "Fl", "Fn", "Ft", 42 "Ic", "In", "Li", "Nd", 43 "Nm", "Op", "Ot", "Pa", 44 "Rv", "St", "Va", "Vt", 45 "Xr", "%A", "%B", "%D", 46 "%I", "%J", "%N", "%O", 47 "%P", "%R", "%T", "%V", 48 "Ac", "Ao", "Aq", "At", 49 "Bc", "Bf", "Bo", "Bq", 50 "Bsx", "Bx", "Db", "Dc", 51 "Do", "Dq", "Ec", "Ef", 52 "Em", "Eo", "Fx", "Ms", 53 "No", "Ns", "Nx", "Ox", 54 "Pc", "Pf", "Po", "Pq", 55 "Qc", "Ql", "Qo", "Qq", 56 "Re", "Rs", "Sc", "So", 57 "Sq", "Sm", "Sx", "Sy", 58 "Tn", "Ux", "Xc", "Xo", 59 "Fo", "Fc", "Oo", "Oc", 60 "Bk", "Ek", "Bt", "Hf", 61 "Fr", "Ud", "Lb", "Lp", 62 "Lk", "Mt", "Brq", "Bro", 63 "Brc", "%C", "Es", "En", 64 "Dx", "%Q", "br", "sp", 65 "%U", "Ta", "ll", "text", 66 }; 67 68const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 69 "split", "nosplit", "ragged", 70 "unfilled", "literal", "file", 71 "offset", "bullet", "dash", 72 "hyphen", "item", "enum", 73 "tag", "diag", "hang", 74 "ohang", "inset", "column", 75 "width", "compact", "std", 76 "filled", "words", "emphasis", 77 "symbolic", "nested", "centered" 78 }; 79 80const char * const *mdoc_macronames = __mdoc_macronames; 81const char * const *mdoc_argnames = __mdoc_argnames; 82 83static void mdoc_node_free(struct mdoc_node *); 84static void mdoc_node_unlink(struct mdoc *, 85 struct mdoc_node *); 86static void mdoc_free1(struct mdoc *); 87static void mdoc_alloc1(struct mdoc *); 88static struct mdoc_node *node_alloc(struct mdoc *, int, int, 89 enum mdoct, enum mdoc_type); 90static int node_append(struct mdoc *, 91 struct mdoc_node *); 92#if 0 93static int mdoc_preptext(struct mdoc *, int, char *, int); 94#endif 95static int mdoc_ptext(struct mdoc *, int, char *, int); 96static int mdoc_pmacro(struct mdoc *, int, char *, int); 97 98 99const struct mdoc_node * 100mdoc_node(const struct mdoc *mdoc) 101{ 102 103 return(mdoc->first); 104} 105 106const struct mdoc_meta * 107mdoc_meta(const struct mdoc *mdoc) 108{ 109 110 return(&mdoc->meta); 111} 112 113/* 114 * Frees volatile resources (parse tree, meta-data, fields). 115 */ 116static void 117mdoc_free1(struct mdoc *mdoc) 118{ 119 120 if (mdoc->first) 121 mdoc_node_delete(mdoc, mdoc->first); 122 if (mdoc->meta.title) 123 free(mdoc->meta.title); 124 if (mdoc->meta.os) 125 free(mdoc->meta.os); 126 if (mdoc->meta.name) 127 free(mdoc->meta.name); 128 if (mdoc->meta.arch) 129 free(mdoc->meta.arch); 130 if (mdoc->meta.vol) 131 free(mdoc->meta.vol); 132 if (mdoc->meta.msec) 133 free(mdoc->meta.msec); 134 if (mdoc->meta.date) 135 free(mdoc->meta.date); 136} 137 138/* 139 * Allocate all volatile resources (parse tree, meta-data, fields). 140 */ 141static void 142mdoc_alloc1(struct mdoc *mdoc) 143{ 144 145 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 146 mdoc->flags = 0; 147 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 148 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 149 mdoc->first = mdoc->last; 150 mdoc->last->type = MDOC_ROOT; 151 mdoc->last->tok = MDOC_MAX; 152 mdoc->next = MDOC_NEXT_CHILD; 153} 154 155/* 156 * Free up volatile resources (see mdoc_free1()) then re-initialises the 157 * data with mdoc_alloc1(). After invocation, parse data has been reset 158 * and the parser is ready for re-invocation on a new tree; however, 159 * cross-parse non-volatile data is kept intact. 160 */ 161void 162mdoc_reset(struct mdoc *mdoc) 163{ 164 165 mdoc_free1(mdoc); 166 mdoc_alloc1(mdoc); 167} 168 169/* 170 * Completely free up all volatile and non-volatile parse resources. 171 * After invocation, the pointer is no longer usable. 172 */ 173void 174mdoc_free(struct mdoc *mdoc) 175{ 176 177 mdoc_free1(mdoc); 178 free(mdoc); 179} 180 181/* 182 * Allocate volatile and non-volatile parse resources. 183 */ 184struct mdoc * 185mdoc_alloc(struct roff *roff, struct mparse *parse, 186 const char *defos, int quick) 187{ 188 struct mdoc *p; 189 190 p = mandoc_calloc(1, sizeof(struct mdoc)); 191 192 p->parse = parse; 193 p->defos = defos; 194 p->quick = quick; 195 p->roff = roff; 196 197 mdoc_hash_init(); 198 mdoc_alloc1(p); 199 return(p); 200} 201 202int 203mdoc_endparse(struct mdoc *mdoc) 204{ 205 206 return(mdoc_macroend(mdoc)); 207} 208 209int 210mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep) 211{ 212 struct mdoc_node *n; 213 214 n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 215 n->eqn = ep; 216 217 if ( ! node_append(mdoc, n)) 218 return(0); 219 220 mdoc->next = MDOC_NEXT_SIBLING; 221 return(1); 222} 223 224int 225mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp) 226{ 227 struct mdoc_node *n; 228 229 n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL); 230 n->span = sp; 231 232 if ( ! node_append(mdoc, n)) 233 return(0); 234 235 mdoc->next = MDOC_NEXT_SIBLING; 236 return(1); 237} 238 239/* 240 * Main parse routine. Parses a single line -- really just hands off to 241 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 242 */ 243int 244mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs) 245{ 246 247 mdoc->flags |= MDOC_NEWLINE; 248 249 /* 250 * Let the roff nS register switch SYNOPSIS mode early, 251 * such that the parser knows at all times 252 * whether this mode is on or off. 253 * Note that this mode is also switched by the Sh macro. 254 */ 255 if (roff_getreg(mdoc->roff, "nS")) 256 mdoc->flags |= MDOC_SYNOPSIS; 257 else 258 mdoc->flags &= ~MDOC_SYNOPSIS; 259 260 return(roff_getcontrol(mdoc->roff, buf, &offs) ? 261 mdoc_pmacro(mdoc, ln, buf, offs) : 262 mdoc_ptext(mdoc, ln, buf, offs)); 263} 264 265int 266mdoc_macro(MACRO_PROT_ARGS) 267{ 268 assert(tok < MDOC_MAX); 269 270 /* If we're in the body, deny prologue calls. */ 271 272 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 273 MDOC_PBODY & mdoc->flags) { 274 mandoc_vmsg(MANDOCERR_PROLOG_ONLY, mdoc->parse, 275 line, ppos, "%s", mdoc_macronames[tok]); 276 return(1); 277 } 278 279 /* If we're in the prologue, deny "body" macros. */ 280 281 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 282 ! (MDOC_PBODY & mdoc->flags)) { 283 mandoc_vmsg(MANDOCERR_PROLOG_BAD, mdoc->parse, 284 line, ppos, "%s", mdoc_macronames[tok]); 285 if (NULL == mdoc->meta.msec) 286 mdoc->meta.msec = mandoc_strdup("1"); 287 if (NULL == mdoc->meta.title) 288 mdoc->meta.title = mandoc_strdup("UNKNOWN"); 289 if (NULL == mdoc->meta.vol) 290 mdoc->meta.vol = mandoc_strdup("LOCAL"); 291 if (NULL == mdoc->meta.os) 292 mdoc->meta.os = mandoc_strdup("LOCAL"); 293 if (NULL == mdoc->meta.date) 294 mdoc->meta.date = mandoc_normdate 295 (mdoc->parse, NULL, line, ppos); 296 mdoc->flags |= MDOC_PBODY; 297 } 298 299 return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf)); 300} 301 302 303static int 304node_append(struct mdoc *mdoc, struct mdoc_node *p) 305{ 306 307 assert(mdoc->last); 308 assert(mdoc->first); 309 assert(MDOC_ROOT != p->type); 310 311 switch (mdoc->next) { 312 case MDOC_NEXT_SIBLING: 313 mdoc->last->next = p; 314 p->prev = mdoc->last; 315 p->parent = mdoc->last->parent; 316 break; 317 case MDOC_NEXT_CHILD: 318 mdoc->last->child = p; 319 p->parent = mdoc->last; 320 break; 321 default: 322 abort(); 323 /* NOTREACHED */ 324 } 325 326 p->parent->nchild++; 327 328 /* 329 * Copy over the normalised-data pointer of our parent. Not 330 * everybody has one, but copying a null pointer is fine. 331 */ 332 333 switch (p->type) { 334 case MDOC_BODY: 335 if (ENDBODY_NOT != p->end) 336 break; 337 /* FALLTHROUGH */ 338 case MDOC_TAIL: 339 /* FALLTHROUGH */ 340 case MDOC_HEAD: 341 p->norm = p->parent->norm; 342 break; 343 default: 344 break; 345 } 346 347 if ( ! mdoc_valid_pre(mdoc, p)) 348 return(0); 349 350 switch (p->type) { 351 case MDOC_HEAD: 352 assert(MDOC_BLOCK == p->parent->type); 353 p->parent->head = p; 354 break; 355 case MDOC_TAIL: 356 assert(MDOC_BLOCK == p->parent->type); 357 p->parent->tail = p; 358 break; 359 case MDOC_BODY: 360 if (p->end) 361 break; 362 assert(MDOC_BLOCK == p->parent->type); 363 p->parent->body = p; 364 break; 365 default: 366 break; 367 } 368 369 mdoc->last = p; 370 371 switch (p->type) { 372 case MDOC_TBL: 373 /* FALLTHROUGH */ 374 case MDOC_TEXT: 375 if ( ! mdoc_valid_post(mdoc)) 376 return(0); 377 break; 378 default: 379 break; 380 } 381 382 return(1); 383} 384 385static struct mdoc_node * 386node_alloc(struct mdoc *mdoc, int line, int pos, 387 enum mdoct tok, enum mdoc_type type) 388{ 389 struct mdoc_node *p; 390 391 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 392 p->sec = mdoc->lastsec; 393 p->line = line; 394 p->pos = pos; 395 p->lastline = line; 396 p->tok = tok; 397 p->type = type; 398 399 /* Flag analysis. */ 400 401 if (MDOC_SYNOPSIS & mdoc->flags) 402 p->flags |= MDOC_SYNPRETTY; 403 else 404 p->flags &= ~MDOC_SYNPRETTY; 405 if (MDOC_NEWLINE & mdoc->flags) 406 p->flags |= MDOC_LINE; 407 mdoc->flags &= ~MDOC_NEWLINE; 408 409 return(p); 410} 411 412int 413mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 414{ 415 struct mdoc_node *p; 416 417 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 418 if ( ! node_append(mdoc, p)) 419 return(0); 420 mdoc->next = MDOC_NEXT_CHILD; 421 return(1); 422} 423 424int 425mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 426{ 427 struct mdoc_node *p; 428 429 assert(mdoc->first); 430 assert(mdoc->last); 431 432 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 433 if ( ! node_append(mdoc, p)) 434 return(0); 435 mdoc->next = MDOC_NEXT_CHILD; 436 return(1); 437} 438 439int 440mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 441{ 442 struct mdoc_node *p; 443 444 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 445 if ( ! node_append(mdoc, p)) 446 return(0); 447 mdoc->next = MDOC_NEXT_CHILD; 448 return(1); 449} 450 451int 452mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok, 453 struct mdoc_node *body, enum mdoc_endbody end) 454{ 455 struct mdoc_node *p; 456 457 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 458 p->pending = body; 459 p->norm = body->norm; 460 p->end = end; 461 if ( ! node_append(mdoc, p)) 462 return(0); 463 mdoc->next = MDOC_NEXT_SIBLING; 464 return(1); 465} 466 467int 468mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 469 enum mdoct tok, struct mdoc_arg *args) 470{ 471 struct mdoc_node *p; 472 473 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 474 p->args = args; 475 if (p->args) 476 (args->refcnt)++; 477 478 switch (tok) { 479 case MDOC_Bd: 480 /* FALLTHROUGH */ 481 case MDOC_Bf: 482 /* FALLTHROUGH */ 483 case MDOC_Bl: 484 /* FALLTHROUGH */ 485 case MDOC_En: 486 /* FALLTHROUGH */ 487 case MDOC_Rs: 488 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 489 break; 490 default: 491 break; 492 } 493 494 if ( ! node_append(mdoc, p)) 495 return(0); 496 mdoc->next = MDOC_NEXT_CHILD; 497 return(1); 498} 499 500int 501mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 502 enum mdoct tok, struct mdoc_arg *args) 503{ 504 struct mdoc_node *p; 505 506 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 507 p->args = args; 508 if (p->args) 509 (args->refcnt)++; 510 511 switch (tok) { 512 case MDOC_An: 513 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 514 break; 515 default: 516 break; 517 } 518 519 if ( ! node_append(mdoc, p)) 520 return(0); 521 mdoc->next = MDOC_NEXT_CHILD; 522 return(1); 523} 524 525int 526mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p) 527{ 528 struct mdoc_node *n; 529 530 n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT); 531 n->string = roff_strdup(mdoc->roff, p); 532 533 if ( ! node_append(mdoc, n)) 534 return(0); 535 536 mdoc->next = MDOC_NEXT_SIBLING; 537 return(1); 538} 539 540void 541mdoc_word_append(struct mdoc *mdoc, const char *p) 542{ 543 struct mdoc_node *n; 544 char *addstr, *newstr; 545 546 n = mdoc->last; 547 addstr = roff_strdup(mdoc->roff, p); 548 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 549 free(addstr); 550 free(n->string); 551 n->string = newstr; 552 mdoc->next = MDOC_NEXT_SIBLING; 553} 554 555static void 556mdoc_node_free(struct mdoc_node *p) 557{ 558 559 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 560 free(p->norm); 561 if (p->string) 562 free(p->string); 563 if (p->args) 564 mdoc_argv_free(p->args); 565 free(p); 566} 567 568static void 569mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n) 570{ 571 572 /* Adjust siblings. */ 573 574 if (n->prev) 575 n->prev->next = n->next; 576 if (n->next) 577 n->next->prev = n->prev; 578 579 /* Adjust parent. */ 580 581 if (n->parent) { 582 n->parent->nchild--; 583 if (n->parent->child == n) 584 n->parent->child = n->prev ? n->prev : n->next; 585 if (n->parent->last == n) 586 n->parent->last = n->prev ? n->prev : NULL; 587 } 588 589 /* Adjust parse point, if applicable. */ 590 591 if (mdoc && mdoc->last == n) { 592 if (n->prev) { 593 mdoc->last = n->prev; 594 mdoc->next = MDOC_NEXT_SIBLING; 595 } else { 596 mdoc->last = n->parent; 597 mdoc->next = MDOC_NEXT_CHILD; 598 } 599 } 600 601 if (mdoc && mdoc->first == n) 602 mdoc->first = NULL; 603} 604 605void 606mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p) 607{ 608 609 while (p->child) { 610 assert(p->nchild); 611 mdoc_node_delete(mdoc, p->child); 612 } 613 assert(0 == p->nchild); 614 615 mdoc_node_unlink(mdoc, p); 616 mdoc_node_free(p); 617} 618 619int 620mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p) 621{ 622 623 mdoc_node_unlink(mdoc, p); 624 return(node_append(mdoc, p)); 625} 626 627#if 0 628/* 629 * Pre-treat a text line. 630 * Text lines can consist of equations, which must be handled apart from 631 * the regular text. 632 * Thus, use this function to step through a line checking if it has any 633 * equations embedded in it. 634 * This must handle multiple equations AND equations that do not end at 635 * the end-of-line, i.e., will re-enter in the next roff parse. 636 */ 637static int 638mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs) 639{ 640 char *start, *end; 641 char delim; 642 643 while ('\0' != buf[offs]) { 644 /* Mark starting position if eqn is set. */ 645 start = NULL; 646 if ('\0' != (delim = roff_eqndelim(mdoc->roff))) 647 if (NULL != (start = strchr(buf + offs, delim))) 648 *start++ = '\0'; 649 650 /* Parse text as normal. */ 651 if ( ! mdoc_ptext(mdoc, line, buf, offs)) 652 return(0); 653 654 /* Continue only if an equation exists. */ 655 if (NULL == start) 656 break; 657 658 /* Read past the end of the equation. */ 659 offs += start - (buf + offs); 660 assert(start == &buf[offs]); 661 if (NULL != (end = strchr(buf + offs, delim))) { 662 *end++ = '\0'; 663 while (' ' == *end) 664 end++; 665 } 666 667 /* Parse the equation itself. */ 668 roff_openeqn(mdoc->roff, NULL, line, offs, buf); 669 670 /* Process a finished equation? */ 671 if (roff_closeeqn(mdoc->roff)) 672 if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff))) 673 return(0); 674 offs += (end - (buf + offs)); 675 } 676 677 return(1); 678} 679#endif 680 681/* 682 * Parse free-form text, that is, a line that does not begin with the 683 * control character. 684 */ 685static int 686mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs) 687{ 688 char *c, *ws, *end; 689 struct mdoc_node *n; 690 691 assert(mdoc->last); 692 n = mdoc->last; 693 694 /* 695 * Divert directly to list processing if we're encountering a 696 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 697 * (a MDOC_BODY means it's already open, in which case we should 698 * process within its context in the normal way). 699 */ 700 701 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 702 LIST_column == n->norm->Bl.type) { 703 /* `Bl' is open without any children. */ 704 mdoc->flags |= MDOC_FREECOL; 705 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 706 } 707 708 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 709 NULL != n->parent && 710 MDOC_Bl == n->parent->tok && 711 LIST_column == n->parent->norm->Bl.type) { 712 /* `Bl' has block-level `It' children. */ 713 mdoc->flags |= MDOC_FREECOL; 714 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 715 } 716 717 /* 718 * Search for the beginning of unescaped trailing whitespace (ws) 719 * and for the first character not to be output (end). 720 */ 721 722 /* FIXME: replace with strcspn(). */ 723 ws = NULL; 724 for (c = end = buf + offs; *c; c++) { 725 switch (*c) { 726 case ' ': 727 if (NULL == ws) 728 ws = c; 729 continue; 730 case '\t': 731 /* 732 * Always warn about trailing tabs, 733 * even outside literal context, 734 * where they should be put on the next line. 735 */ 736 if (NULL == ws) 737 ws = c; 738 /* 739 * Strip trailing tabs in literal context only; 740 * outside, they affect the next line. 741 */ 742 if (MDOC_LITERAL & mdoc->flags) 743 continue; 744 break; 745 case '\\': 746 /* Skip the escaped character, too, if any. */ 747 if (c[1]) 748 c++; 749 /* FALLTHROUGH */ 750 default: 751 ws = NULL; 752 break; 753 } 754 end = c + 1; 755 } 756 *end = '\0'; 757 758 if (ws) 759 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 760 line, (int)(ws-buf), NULL); 761 762 if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) { 763 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 764 line, (int)(c - buf), NULL); 765 766 /* 767 * Insert a `sp' in the case of a blank line. Technically, 768 * blank lines aren't allowed, but enough manuals assume this 769 * behaviour that we want to work around it. 770 */ 771 if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL)) 772 return(0); 773 774 mdoc->next = MDOC_NEXT_SIBLING; 775 776 return(mdoc_valid_post(mdoc)); 777 } 778 779 if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs)) 780 return(0); 781 782 if (MDOC_LITERAL & mdoc->flags) 783 return(1); 784 785 /* 786 * End-of-sentence check. If the last character is an unescaped 787 * EOS character, then flag the node as being the end of a 788 * sentence. The front-end will know how to interpret this. 789 */ 790 791 assert(buf < end); 792 793 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 794 mdoc->last->flags |= MDOC_EOS; 795 796 return(1); 797} 798 799/* 800 * Parse a macro line, that is, a line beginning with the control 801 * character. 802 */ 803static int 804mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) 805{ 806 enum mdoct tok; 807 int i, sv; 808 char mac[5]; 809 struct mdoc_node *n; 810 811 /* Empty post-control lines are ignored. */ 812 813 if ('"' == buf[offs]) { 814 mandoc_msg(MANDOCERR_COMMENT_BAD, mdoc->parse, 815 ln, offs, NULL); 816 return(1); 817 } else if ('\0' == buf[offs]) 818 return(1); 819 820 sv = offs; 821 822 /* 823 * Copy the first word into a nil-terminated buffer. 824 * Stop copying when a tab, space, or eoln is encountered. 825 */ 826 827 i = 0; 828 while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] && 829 '\t' != buf[offs]) 830 mac[i++] = buf[offs++]; 831 832 mac[i] = '\0'; 833 834 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 835 836 if (MDOC_MAX == tok) { 837 mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse, 838 ln, sv, "%s", buf + sv - 1); 839 return(1); 840 } 841 842 /* Disregard the first trailing tab, if applicable. */ 843 844 if ('\t' == buf[offs]) 845 offs++; 846 847 /* Jump to the next non-whitespace word. */ 848 849 while (buf[offs] && ' ' == buf[offs]) 850 offs++; 851 852 /* 853 * Trailing whitespace. Note that tabs are allowed to be passed 854 * into the parser as "text", so we only warn about spaces here. 855 */ 856 857 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 858 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 859 ln, offs - 1, NULL); 860 861 /* 862 * If an initial macro or a list invocation, divert directly 863 * into macro processing. 864 */ 865 866 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) 867 return(mdoc_macro(mdoc, tok, ln, sv, &offs, buf)); 868 869 n = mdoc->last; 870 assert(mdoc->last); 871 872 /* 873 * If the first macro of a `Bl -column', open an `It' block 874 * context around the parsed macro. 875 */ 876 877 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 878 LIST_column == n->norm->Bl.type) { 879 mdoc->flags |= MDOC_FREECOL; 880 return(mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)); 881 } 882 883 /* 884 * If we're following a block-level `It' within a `Bl -column' 885 * context (perhaps opened in the above block or in ptext()), 886 * then open an `It' block context around the parsed macro. 887 */ 888 889 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 890 NULL != n->parent && 891 MDOC_Bl == n->parent->tok && 892 LIST_column == n->parent->norm->Bl.type) { 893 mdoc->flags |= MDOC_FREECOL; 894 return(mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)); 895 } 896 897 /* Normal processing of a macro. */ 898 899 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 900 return(0); 901 902 /* In quick mode (for mandocdb), abort after the NAME section. */ 903 904 if (mdoc->quick && MDOC_Sh == tok && 905 SEC_NAME != mdoc->last->sec) 906 return(2); 907 908 return(1); 909} 910 911enum mdelim 912mdoc_isdelim(const char *p) 913{ 914 915 if ('\0' == p[0]) 916 return(DELIM_NONE); 917 918 if ('\0' == p[1]) 919 switch (p[0]) { 920 case '(': 921 /* FALLTHROUGH */ 922 case '[': 923 return(DELIM_OPEN); 924 case '|': 925 return(DELIM_MIDDLE); 926 case '.': 927 /* FALLTHROUGH */ 928 case ',': 929 /* FALLTHROUGH */ 930 case ';': 931 /* FALLTHROUGH */ 932 case ':': 933 /* FALLTHROUGH */ 934 case '?': 935 /* FALLTHROUGH */ 936 case '!': 937 /* FALLTHROUGH */ 938 case ')': 939 /* FALLTHROUGH */ 940 case ']': 941 return(DELIM_CLOSE); 942 default: 943 return(DELIM_NONE); 944 } 945 946 if ('\\' != p[0]) 947 return(DELIM_NONE); 948 949 if (0 == strcmp(p + 1, ".")) 950 return(DELIM_CLOSE); 951 if (0 == strcmp(p + 1, "fR|\\fP")) 952 return(DELIM_MIDDLE); 953 954 return(DELIM_NONE); 955} 956 957void 958mdoc_deroff(char **dest, const struct mdoc_node *n) 959{ 960 char *cp; 961 size_t sz; 962 963 if (MDOC_TEXT != n->type) { 964 for (n = n->child; n; n = n->next) 965 mdoc_deroff(dest, n); 966 return; 967 } 968 969 /* Skip leading whitespace. */ 970 971 for (cp = n->string; '\0' != *cp; cp++) 972 if (0 == isspace((unsigned char)*cp)) 973 break; 974 975 /* Skip trailing whitespace. */ 976 977 for (sz = strlen(cp); sz; sz--) 978 if (0 == isspace((unsigned char)cp[sz-1])) 979 break; 980 981 /* Skip empty strings. */ 982 983 if (0 == sz) 984 return; 985 986 if (NULL == *dest) { 987 *dest = mandoc_strndup(cp, sz); 988 return; 989 } 990 991 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 992 free(*dest); 993 *dest = cp; 994} 995