mdoc.c revision 1.103
1/* $Id: mdoc.c,v 1.103 2014/04/20 16:44:44 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/types.h> 19 20#include <assert.h> 21#include <ctype.h> 22#include <stdarg.h> 23#include <stdio.h> 24#include <stdlib.h> 25#include <string.h> 26#include <time.h> 27 28#include "mdoc.h" 29#include "mandoc.h" 30#include "mandoc_aux.h" 31#include "libmdoc.h" 32#include "libmandoc.h" 33 34const char *const __mdoc_macronames[MDOC_MAX] = { 35 "Ap", "Dd", "Dt", "Os", 36 "Sh", "Ss", "Pp", "D1", 37 "Dl", "Bd", "Ed", "Bl", 38 "El", "It", "Ad", "An", 39 "Ar", "Cd", "Cm", "Dv", 40 "Er", "Ev", "Ex", "Fa", 41 "Fd", "Fl", "Fn", "Ft", 42 "Ic", "In", "Li", "Nd", 43 "Nm", "Op", "Ot", "Pa", 44 "Rv", "St", "Va", "Vt", 45 "Xr", "%A", "%B", "%D", 46 "%I", "%J", "%N", "%O", 47 "%P", "%R", "%T", "%V", 48 "Ac", "Ao", "Aq", "At", 49 "Bc", "Bf", "Bo", "Bq", 50 "Bsx", "Bx", "Db", "Dc", 51 "Do", "Dq", "Ec", "Ef", 52 "Em", "Eo", "Fx", "Ms", 53 "No", "Ns", "Nx", "Ox", 54 "Pc", "Pf", "Po", "Pq", 55 "Qc", "Ql", "Qo", "Qq", 56 "Re", "Rs", "Sc", "So", 57 "Sq", "Sm", "Sx", "Sy", 58 "Tn", "Ux", "Xc", "Xo", 59 "Fo", "Fc", "Oo", "Oc", 60 "Bk", "Ek", "Bt", "Hf", 61 "Fr", "Ud", "Lb", "Lp", 62 "Lk", "Mt", "Brq", "Bro", 63 "Brc", "%C", "Es", "En", 64 "Dx", "%Q", "br", "sp", 65 "%U", "Ta", "ll", 66 }; 67 68const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 69 "split", "nosplit", "ragged", 70 "unfilled", "literal", "file", 71 "offset", "bullet", "dash", 72 "hyphen", "item", "enum", 73 "tag", "diag", "hang", 74 "ohang", "inset", "column", 75 "width", "compact", "std", 76 "filled", "words", "emphasis", 77 "symbolic", "nested", "centered" 78 }; 79 80const char * const *mdoc_macronames = __mdoc_macronames; 81const char * const *mdoc_argnames = __mdoc_argnames; 82 83static void mdoc_node_free(struct mdoc_node *); 84static void mdoc_node_unlink(struct mdoc *, 85 struct mdoc_node *); 86static void mdoc_free1(struct mdoc *); 87static void mdoc_alloc1(struct mdoc *); 88static struct mdoc_node *node_alloc(struct mdoc *, int, int, 89 enum mdoct, enum mdoc_type); 90static int node_append(struct mdoc *, 91 struct mdoc_node *); 92#if 0 93static int mdoc_preptext(struct mdoc *, int, char *, int); 94#endif 95static int mdoc_ptext(struct mdoc *, int, char *, int); 96static int mdoc_pmacro(struct mdoc *, int, char *, int); 97 98 99const struct mdoc_node * 100mdoc_node(const struct mdoc *mdoc) 101{ 102 103 assert( ! (MDOC_HALT & mdoc->flags)); 104 return(mdoc->first); 105} 106 107const struct mdoc_meta * 108mdoc_meta(const struct mdoc *mdoc) 109{ 110 111 assert( ! (MDOC_HALT & mdoc->flags)); 112 return(&mdoc->meta); 113} 114 115/* 116 * Frees volatile resources (parse tree, meta-data, fields). 117 */ 118static void 119mdoc_free1(struct mdoc *mdoc) 120{ 121 122 if (mdoc->first) 123 mdoc_node_delete(mdoc, mdoc->first); 124 if (mdoc->meta.title) 125 free(mdoc->meta.title); 126 if (mdoc->meta.os) 127 free(mdoc->meta.os); 128 if (mdoc->meta.name) 129 free(mdoc->meta.name); 130 if (mdoc->meta.arch) 131 free(mdoc->meta.arch); 132 if (mdoc->meta.vol) 133 free(mdoc->meta.vol); 134 if (mdoc->meta.msec) 135 free(mdoc->meta.msec); 136 if (mdoc->meta.date) 137 free(mdoc->meta.date); 138} 139 140/* 141 * Allocate all volatile resources (parse tree, meta-data, fields). 142 */ 143static void 144mdoc_alloc1(struct mdoc *mdoc) 145{ 146 147 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 148 mdoc->flags = 0; 149 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 150 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 151 mdoc->first = mdoc->last; 152 mdoc->last->type = MDOC_ROOT; 153 mdoc->last->tok = MDOC_MAX; 154 mdoc->next = MDOC_NEXT_CHILD; 155} 156 157/* 158 * Free up volatile resources (see mdoc_free1()) then re-initialises the 159 * data with mdoc_alloc1(). After invocation, parse data has been reset 160 * and the parser is ready for re-invocation on a new tree; however, 161 * cross-parse non-volatile data is kept intact. 162 */ 163void 164mdoc_reset(struct mdoc *mdoc) 165{ 166 167 mdoc_free1(mdoc); 168 mdoc_alloc1(mdoc); 169} 170 171/* 172 * Completely free up all volatile and non-volatile parse resources. 173 * After invocation, the pointer is no longer usable. 174 */ 175void 176mdoc_free(struct mdoc *mdoc) 177{ 178 179 mdoc_free1(mdoc); 180 free(mdoc); 181} 182 183/* 184 * Allocate volatile and non-volatile parse resources. 185 */ 186struct mdoc * 187mdoc_alloc(struct roff *roff, struct mparse *parse, 188 char *defos, int quick) 189{ 190 struct mdoc *p; 191 192 p = mandoc_calloc(1, sizeof(struct mdoc)); 193 194 p->parse = parse; 195 p->defos = defos; 196 p->quick = quick; 197 p->roff = roff; 198 199 mdoc_hash_init(); 200 mdoc_alloc1(p); 201 return(p); 202} 203 204/* 205 * Climb back up the parse tree, validating open scopes. Mostly calls 206 * through to macro_end() in macro.c. 207 */ 208int 209mdoc_endparse(struct mdoc *mdoc) 210{ 211 212 assert( ! (MDOC_HALT & mdoc->flags)); 213 if (mdoc_macroend(mdoc)) 214 return(1); 215 mdoc->flags |= MDOC_HALT; 216 return(0); 217} 218 219int 220mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep) 221{ 222 struct mdoc_node *n; 223 224 assert( ! (MDOC_HALT & mdoc->flags)); 225 226 /* No text before an initial macro. */ 227 228 if (SEC_NONE == mdoc->lastnamed) { 229 mdoc_pmsg(mdoc, ep->ln, ep->pos, MANDOCERR_NOTEXT); 230 return(1); 231 } 232 233 n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 234 n->eqn = ep; 235 236 if ( ! node_append(mdoc, n)) 237 return(0); 238 239 mdoc->next = MDOC_NEXT_SIBLING; 240 return(1); 241} 242 243int 244mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp) 245{ 246 struct mdoc_node *n; 247 248 assert( ! (MDOC_HALT & mdoc->flags)); 249 250 /* No text before an initial macro. */ 251 252 if (SEC_NONE == mdoc->lastnamed) { 253 mdoc_pmsg(mdoc, sp->line, 0, MANDOCERR_NOTEXT); 254 return(1); 255 } 256 257 n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL); 258 n->span = sp; 259 260 if ( ! node_append(mdoc, n)) 261 return(0); 262 263 mdoc->next = MDOC_NEXT_SIBLING; 264 return(1); 265} 266 267/* 268 * Main parse routine. Parses a single line -- really just hands off to 269 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 270 */ 271int 272mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs) 273{ 274 275 assert( ! (MDOC_HALT & mdoc->flags)); 276 277 mdoc->flags |= MDOC_NEWLINE; 278 279 /* 280 * Let the roff nS register switch SYNOPSIS mode early, 281 * such that the parser knows at all times 282 * whether this mode is on or off. 283 * Note that this mode is also switched by the Sh macro. 284 */ 285 if (roff_getreg(mdoc->roff, "nS")) 286 mdoc->flags |= MDOC_SYNOPSIS; 287 else 288 mdoc->flags &= ~MDOC_SYNOPSIS; 289 290 return(roff_getcontrol(mdoc->roff, buf, &offs) ? 291 mdoc_pmacro(mdoc, ln, buf, offs) : 292 mdoc_ptext(mdoc, ln, buf, offs)); 293} 294 295int 296mdoc_macro(MACRO_PROT_ARGS) 297{ 298 assert(tok < MDOC_MAX); 299 300 /* If we're in the body, deny prologue calls. */ 301 302 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 303 MDOC_PBODY & mdoc->flags) { 304 mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADBODY); 305 return(1); 306 } 307 308 /* If we're in the prologue, deny "body" macros. */ 309 310 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 311 ! (MDOC_PBODY & mdoc->flags)) { 312 mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADPROLOG); 313 if (NULL == mdoc->meta.msec) 314 mdoc->meta.msec = mandoc_strdup("1"); 315 if (NULL == mdoc->meta.title) 316 mdoc->meta.title = mandoc_strdup("UNKNOWN"); 317 if (NULL == mdoc->meta.vol) 318 mdoc->meta.vol = mandoc_strdup("LOCAL"); 319 if (NULL == mdoc->meta.os) 320 mdoc->meta.os = mandoc_strdup("LOCAL"); 321 if (NULL == mdoc->meta.date) 322 mdoc->meta.date = mandoc_normdate 323 (mdoc->parse, NULL, line, ppos); 324 mdoc->flags |= MDOC_PBODY; 325 } 326 327 return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf)); 328} 329 330 331static int 332node_append(struct mdoc *mdoc, struct mdoc_node *p) 333{ 334 335 assert(mdoc->last); 336 assert(mdoc->first); 337 assert(MDOC_ROOT != p->type); 338 339 switch (mdoc->next) { 340 case MDOC_NEXT_SIBLING: 341 mdoc->last->next = p; 342 p->prev = mdoc->last; 343 p->parent = mdoc->last->parent; 344 break; 345 case MDOC_NEXT_CHILD: 346 mdoc->last->child = p; 347 p->parent = mdoc->last; 348 break; 349 default: 350 abort(); 351 /* NOTREACHED */ 352 } 353 354 p->parent->nchild++; 355 356 /* 357 * Copy over the normalised-data pointer of our parent. Not 358 * everybody has one, but copying a null pointer is fine. 359 */ 360 361 switch (p->type) { 362 case MDOC_BODY: 363 if (ENDBODY_NOT != p->end) 364 break; 365 /* FALLTHROUGH */ 366 case MDOC_TAIL: 367 /* FALLTHROUGH */ 368 case MDOC_HEAD: 369 p->norm = p->parent->norm; 370 break; 371 default: 372 break; 373 } 374 375 if ( ! mdoc_valid_pre(mdoc, p)) 376 return(0); 377 378 switch (p->type) { 379 case MDOC_HEAD: 380 assert(MDOC_BLOCK == p->parent->type); 381 p->parent->head = p; 382 break; 383 case MDOC_TAIL: 384 assert(MDOC_BLOCK == p->parent->type); 385 p->parent->tail = p; 386 break; 387 case MDOC_BODY: 388 if (p->end) 389 break; 390 assert(MDOC_BLOCK == p->parent->type); 391 p->parent->body = p; 392 break; 393 default: 394 break; 395 } 396 397 mdoc->last = p; 398 399 switch (p->type) { 400 case MDOC_TBL: 401 /* FALLTHROUGH */ 402 case MDOC_TEXT: 403 if ( ! mdoc_valid_post(mdoc)) 404 return(0); 405 break; 406 default: 407 break; 408 } 409 410 return(1); 411} 412 413static struct mdoc_node * 414node_alloc(struct mdoc *mdoc, int line, int pos, 415 enum mdoct tok, enum mdoc_type type) 416{ 417 struct mdoc_node *p; 418 419 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 420 p->sec = mdoc->lastsec; 421 p->line = line; 422 p->pos = pos; 423 p->lastline = line; 424 p->tok = tok; 425 p->type = type; 426 427 /* Flag analysis. */ 428 429 if (MDOC_SYNOPSIS & mdoc->flags) 430 p->flags |= MDOC_SYNPRETTY; 431 else 432 p->flags &= ~MDOC_SYNPRETTY; 433 if (MDOC_NEWLINE & mdoc->flags) 434 p->flags |= MDOC_LINE; 435 mdoc->flags &= ~MDOC_NEWLINE; 436 437 return(p); 438} 439 440int 441mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 442{ 443 struct mdoc_node *p; 444 445 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 446 if ( ! node_append(mdoc, p)) 447 return(0); 448 mdoc->next = MDOC_NEXT_CHILD; 449 return(1); 450} 451 452int 453mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 454{ 455 struct mdoc_node *p; 456 457 assert(mdoc->first); 458 assert(mdoc->last); 459 460 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 461 if ( ! node_append(mdoc, p)) 462 return(0); 463 mdoc->next = MDOC_NEXT_CHILD; 464 return(1); 465} 466 467int 468mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 469{ 470 struct mdoc_node *p; 471 472 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 473 if ( ! node_append(mdoc, p)) 474 return(0); 475 mdoc->next = MDOC_NEXT_CHILD; 476 return(1); 477} 478 479int 480mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok, 481 struct mdoc_node *body, enum mdoc_endbody end) 482{ 483 struct mdoc_node *p; 484 485 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 486 p->pending = body; 487 p->norm = body->norm; 488 p->end = end; 489 if ( ! node_append(mdoc, p)) 490 return(0); 491 mdoc->next = MDOC_NEXT_SIBLING; 492 return(1); 493} 494 495int 496mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 497 enum mdoct tok, struct mdoc_arg *args) 498{ 499 struct mdoc_node *p; 500 501 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 502 p->args = args; 503 if (p->args) 504 (args->refcnt)++; 505 506 switch (tok) { 507 case MDOC_Bd: 508 /* FALLTHROUGH */ 509 case MDOC_Bf: 510 /* FALLTHROUGH */ 511 case MDOC_Bl: 512 /* FALLTHROUGH */ 513 case MDOC_Rs: 514 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 515 break; 516 default: 517 break; 518 } 519 520 if ( ! node_append(mdoc, p)) 521 return(0); 522 mdoc->next = MDOC_NEXT_CHILD; 523 return(1); 524} 525 526int 527mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 528 enum mdoct tok, struct mdoc_arg *args) 529{ 530 struct mdoc_node *p; 531 532 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 533 p->args = args; 534 if (p->args) 535 (args->refcnt)++; 536 537 switch (tok) { 538 case MDOC_An: 539 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 540 break; 541 default: 542 break; 543 } 544 545 if ( ! node_append(mdoc, p)) 546 return(0); 547 mdoc->next = MDOC_NEXT_CHILD; 548 return(1); 549} 550 551int 552mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p) 553{ 554 struct mdoc_node *n; 555 556 n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT); 557 n->string = roff_strdup(mdoc->roff, p); 558 559 if ( ! node_append(mdoc, n)) 560 return(0); 561 562 mdoc->next = MDOC_NEXT_SIBLING; 563 return(1); 564} 565 566void 567mdoc_word_append(struct mdoc *mdoc, const char *p) 568{ 569 struct mdoc_node *n; 570 char *addstr, *newstr; 571 572 n = mdoc->last; 573 addstr = roff_strdup(mdoc->roff, p); 574 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 575 free(addstr); 576 free(n->string); 577 n->string = newstr; 578 mdoc->next = MDOC_NEXT_SIBLING; 579} 580 581static void 582mdoc_node_free(struct mdoc_node *p) 583{ 584 585 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 586 free(p->norm); 587 if (p->string) 588 free(p->string); 589 if (p->args) 590 mdoc_argv_free(p->args); 591 free(p); 592} 593 594static void 595mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n) 596{ 597 598 /* Adjust siblings. */ 599 600 if (n->prev) 601 n->prev->next = n->next; 602 if (n->next) 603 n->next->prev = n->prev; 604 605 /* Adjust parent. */ 606 607 if (n->parent) { 608 n->parent->nchild--; 609 if (n->parent->child == n) 610 n->parent->child = n->prev ? n->prev : n->next; 611 if (n->parent->last == n) 612 n->parent->last = n->prev ? n->prev : NULL; 613 } 614 615 /* Adjust parse point, if applicable. */ 616 617 if (mdoc && mdoc->last == n) { 618 if (n->prev) { 619 mdoc->last = n->prev; 620 mdoc->next = MDOC_NEXT_SIBLING; 621 } else { 622 mdoc->last = n->parent; 623 mdoc->next = MDOC_NEXT_CHILD; 624 } 625 } 626 627 if (mdoc && mdoc->first == n) 628 mdoc->first = NULL; 629} 630 631void 632mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p) 633{ 634 635 while (p->child) { 636 assert(p->nchild); 637 mdoc_node_delete(mdoc, p->child); 638 } 639 assert(0 == p->nchild); 640 641 mdoc_node_unlink(mdoc, p); 642 mdoc_node_free(p); 643} 644 645int 646mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p) 647{ 648 649 mdoc_node_unlink(mdoc, p); 650 return(node_append(mdoc, p)); 651} 652 653#if 0 654/* 655 * Pre-treat a text line. 656 * Text lines can consist of equations, which must be handled apart from 657 * the regular text. 658 * Thus, use this function to step through a line checking if it has any 659 * equations embedded in it. 660 * This must handle multiple equations AND equations that do not end at 661 * the end-of-line, i.e., will re-enter in the next roff parse. 662 */ 663static int 664mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs) 665{ 666 char *start, *end; 667 char delim; 668 669 while ('\0' != buf[offs]) { 670 /* Mark starting position if eqn is set. */ 671 start = NULL; 672 if ('\0' != (delim = roff_eqndelim(mdoc->roff))) 673 if (NULL != (start = strchr(buf + offs, delim))) 674 *start++ = '\0'; 675 676 /* Parse text as normal. */ 677 if ( ! mdoc_ptext(mdoc, line, buf, offs)) 678 return(0); 679 680 /* Continue only if an equation exists. */ 681 if (NULL == start) 682 break; 683 684 /* Read past the end of the equation. */ 685 offs += start - (buf + offs); 686 assert(start == &buf[offs]); 687 if (NULL != (end = strchr(buf + offs, delim))) { 688 *end++ = '\0'; 689 while (' ' == *end) 690 end++; 691 } 692 693 /* Parse the equation itself. */ 694 roff_openeqn(mdoc->roff, NULL, line, offs, buf); 695 696 /* Process a finished equation? */ 697 if (roff_closeeqn(mdoc->roff)) 698 if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff))) 699 return(0); 700 offs += (end - (buf + offs)); 701 } 702 703 return(1); 704} 705#endif 706 707/* 708 * Parse free-form text, that is, a line that does not begin with the 709 * control character. 710 */ 711static int 712mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs) 713{ 714 char *c, *ws, *end; 715 struct mdoc_node *n; 716 717 /* No text before an initial macro. */ 718 719 if (SEC_NONE == mdoc->lastnamed) { 720 mdoc_pmsg(mdoc, line, offs, MANDOCERR_NOTEXT); 721 return(1); 722 } 723 724 assert(mdoc->last); 725 n = mdoc->last; 726 727 /* 728 * Divert directly to list processing if we're encountering a 729 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 730 * (a MDOC_BODY means it's already open, in which case we should 731 * process within its context in the normal way). 732 */ 733 734 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 735 LIST_column == n->norm->Bl.type) { 736 /* `Bl' is open without any children. */ 737 mdoc->flags |= MDOC_FREECOL; 738 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 739 } 740 741 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 742 NULL != n->parent && 743 MDOC_Bl == n->parent->tok && 744 LIST_column == n->parent->norm->Bl.type) { 745 /* `Bl' has block-level `It' children. */ 746 mdoc->flags |= MDOC_FREECOL; 747 return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 748 } 749 750 /* 751 * Search for the beginning of unescaped trailing whitespace (ws) 752 * and for the first character not to be output (end). 753 */ 754 755 /* FIXME: replace with strcspn(). */ 756 ws = NULL; 757 for (c = end = buf + offs; *c; c++) { 758 switch (*c) { 759 case ' ': 760 if (NULL == ws) 761 ws = c; 762 continue; 763 case '\t': 764 /* 765 * Always warn about trailing tabs, 766 * even outside literal context, 767 * where they should be put on the next line. 768 */ 769 if (NULL == ws) 770 ws = c; 771 /* 772 * Strip trailing tabs in literal context only; 773 * outside, they affect the next line. 774 */ 775 if (MDOC_LITERAL & mdoc->flags) 776 continue; 777 break; 778 case '\\': 779 /* Skip the escaped character, too, if any. */ 780 if (c[1]) 781 c++; 782 /* FALLTHROUGH */ 783 default: 784 ws = NULL; 785 break; 786 } 787 end = c + 1; 788 } 789 *end = '\0'; 790 791 if (ws) 792 mdoc_pmsg(mdoc, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 793 794 if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) { 795 mdoc_pmsg(mdoc, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 796 797 /* 798 * Insert a `sp' in the case of a blank line. Technically, 799 * blank lines aren't allowed, but enough manuals assume this 800 * behaviour that we want to work around it. 801 */ 802 if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL)) 803 return(0); 804 805 mdoc->next = MDOC_NEXT_SIBLING; 806 807 return(mdoc_valid_post(mdoc)); 808 } 809 810 if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs)) 811 return(0); 812 813 if (MDOC_LITERAL & mdoc->flags) 814 return(1); 815 816 /* 817 * End-of-sentence check. If the last character is an unescaped 818 * EOS character, then flag the node as being the end of a 819 * sentence. The front-end will know how to interpret this. 820 */ 821 822 assert(buf < end); 823 824 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 825 mdoc->last->flags |= MDOC_EOS; 826 827 return(1); 828} 829 830/* 831 * Parse a macro line, that is, a line beginning with the control 832 * character. 833 */ 834static int 835mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) 836{ 837 enum mdoct tok; 838 int i, sv; 839 char mac[5]; 840 struct mdoc_node *n; 841 842 /* Empty post-control lines are ignored. */ 843 844 if ('"' == buf[offs]) { 845 mdoc_pmsg(mdoc, ln, offs, MANDOCERR_BADCOMMENT); 846 return(1); 847 } else if ('\0' == buf[offs]) 848 return(1); 849 850 sv = offs; 851 852 /* 853 * Copy the first word into a nil-terminated buffer. 854 * Stop copying when a tab, space, or eoln is encountered. 855 */ 856 857 i = 0; 858 while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] && 859 '\t' != buf[offs]) 860 mac[i++] = buf[offs++]; 861 862 mac[i] = '\0'; 863 864 tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 865 866 if (MDOC_MAX == tok) { 867 mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse, 868 ln, sv, "%s", buf + sv - 1); 869 return(1); 870 } 871 872 /* Disregard the first trailing tab, if applicable. */ 873 874 if ('\t' == buf[offs]) 875 offs++; 876 877 /* Jump to the next non-whitespace word. */ 878 879 while (buf[offs] && ' ' == buf[offs]) 880 offs++; 881 882 /* 883 * Trailing whitespace. Note that tabs are allowed to be passed 884 * into the parser as "text", so we only warn about spaces here. 885 */ 886 887 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 888 mdoc_pmsg(mdoc, ln, offs - 1, MANDOCERR_EOLNSPACE); 889 890 /* 891 * If an initial macro or a list invocation, divert directly 892 * into macro processing. 893 */ 894 895 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 896 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 897 goto err; 898 return(1); 899 } 900 901 n = mdoc->last; 902 assert(mdoc->last); 903 904 /* 905 * If the first macro of a `Bl -column', open an `It' block 906 * context around the parsed macro. 907 */ 908 909 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 910 LIST_column == n->norm->Bl.type) { 911 mdoc->flags |= MDOC_FREECOL; 912 if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 913 goto err; 914 return(1); 915 } 916 917 /* 918 * If we're following a block-level `It' within a `Bl -column' 919 * context (perhaps opened in the above block or in ptext()), 920 * then open an `It' block context around the parsed macro. 921 */ 922 923 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 924 NULL != n->parent && 925 MDOC_Bl == n->parent->tok && 926 LIST_column == n->parent->norm->Bl.type) { 927 mdoc->flags |= MDOC_FREECOL; 928 if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 929 goto err; 930 return(1); 931 } 932 933 /* Normal processing of a macro. */ 934 935 if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 936 goto err; 937 938 /* In quick mode (for mandocdb), abort after the NAME section. */ 939 940 if (mdoc->quick && MDOC_Sh == tok && 941 SEC_NAME != mdoc->last->sec) 942 return(2); 943 944 return(1); 945 946err: /* Error out. */ 947 948 mdoc->flags |= MDOC_HALT; 949 return(0); 950} 951 952enum mdelim 953mdoc_isdelim(const char *p) 954{ 955 956 if ('\0' == p[0]) 957 return(DELIM_NONE); 958 959 if ('\0' == p[1]) 960 switch (p[0]) { 961 case '(': 962 /* FALLTHROUGH */ 963 case '[': 964 return(DELIM_OPEN); 965 case '|': 966 return(DELIM_MIDDLE); 967 case '.': 968 /* FALLTHROUGH */ 969 case ',': 970 /* FALLTHROUGH */ 971 case ';': 972 /* FALLTHROUGH */ 973 case ':': 974 /* FALLTHROUGH */ 975 case '?': 976 /* FALLTHROUGH */ 977 case '!': 978 /* FALLTHROUGH */ 979 case ')': 980 /* FALLTHROUGH */ 981 case ']': 982 return(DELIM_CLOSE); 983 default: 984 return(DELIM_NONE); 985 } 986 987 if ('\\' != p[0]) 988 return(DELIM_NONE); 989 990 if (0 == strcmp(p + 1, ".")) 991 return(DELIM_CLOSE); 992 if (0 == strcmp(p + 1, "fR|\\fP")) 993 return(DELIM_MIDDLE); 994 995 return(DELIM_NONE); 996} 997 998void 999mdoc_deroff(char **dest, const struct mdoc_node *n) 1000{ 1001 char *cp; 1002 size_t sz; 1003 1004 if (MDOC_TEXT != n->type) { 1005 for (n = n->child; n; n = n->next) 1006 mdoc_deroff(dest, n); 1007 return; 1008 } 1009 1010 /* Skip leading whitespace. */ 1011 1012 for (cp = n->string; '\0' != *cp; cp++) 1013 if (0 == isspace((unsigned char)*cp)) 1014 break; 1015 1016 /* Skip trailing whitespace. */ 1017 1018 for (sz = strlen(cp); sz; sz--) 1019 if (0 == isspace((unsigned char)cp[sz-1])) 1020 break; 1021 1022 /* Skip empty strings. */ 1023 1024 if (0 == sz) 1025 return; 1026 1027 if (NULL == *dest) { 1028 *dest = mandoc_strndup(cp, sz); 1029 return; 1030 } 1031 1032 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1033 free(*dest); 1034 *dest = cp; 1035} 1036