mdoc.c revision 1.89
1/* $Id: mdoc.c,v 1.89 2012/07/16 09:51:03 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/types.h> 19 20#include <assert.h> 21#include <stdarg.h> 22#include <stdio.h> 23#include <stdlib.h> 24#include <string.h> 25#include <time.h> 26 27#include "mdoc.h" 28#include "mandoc.h" 29#include "libmdoc.h" 30#include "libmandoc.h" 31 32const char *const __mdoc_macronames[MDOC_MAX] = { 33 "Ap", "Dd", "Dt", "Os", 34 "Sh", "Ss", "Pp", "D1", 35 "Dl", "Bd", "Ed", "Bl", 36 "El", "It", "Ad", "An", 37 "Ar", "Cd", "Cm", "Dv", 38 "Er", "Ev", "Ex", "Fa", 39 "Fd", "Fl", "Fn", "Ft", 40 "Ic", "In", "Li", "Nd", 41 "Nm", "Op", "Ot", "Pa", 42 "Rv", "St", "Va", "Vt", 43 /* LINTED */ 44 "Xr", "%A", "%B", "%D", 45 /* LINTED */ 46 "%I", "%J", "%N", "%O", 47 /* LINTED */ 48 "%P", "%R", "%T", "%V", 49 "Ac", "Ao", "Aq", "At", 50 "Bc", "Bf", "Bo", "Bq", 51 "Bsx", "Bx", "Db", "Dc", 52 "Do", "Dq", "Ec", "Ef", 53 "Em", "Eo", "Fx", "Ms", 54 "No", "Ns", "Nx", "Ox", 55 "Pc", "Pf", "Po", "Pq", 56 "Qc", "Ql", "Qo", "Qq", 57 "Re", "Rs", "Sc", "So", 58 "Sq", "Sm", "Sx", "Sy", 59 "Tn", "Ux", "Xc", "Xo", 60 "Fo", "Fc", "Oo", "Oc", 61 "Bk", "Ek", "Bt", "Hf", 62 "Fr", "Ud", "Lb", "Lp", 63 "Lk", "Mt", "Brq", "Bro", 64 /* LINTED */ 65 "Brc", "%C", "Es", "En", 66 /* LINTED */ 67 "Dx", "%Q", "br", "sp", 68 /* LINTED */ 69 "%U", "Ta" 70 }; 71 72const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 73 "split", "nosplit", "ragged", 74 "unfilled", "literal", "file", 75 "offset", "bullet", "dash", 76 "hyphen", "item", "enum", 77 "tag", "diag", "hang", 78 "ohang", "inset", "column", 79 "width", "compact", "std", 80 "filled", "words", "emphasis", 81 "symbolic", "nested", "centered" 82 }; 83 84const char * const *mdoc_macronames = __mdoc_macronames; 85const char * const *mdoc_argnames = __mdoc_argnames; 86 87static void mdoc_node_free(struct mdoc_node *); 88static void mdoc_node_unlink(struct mdoc *, 89 struct mdoc_node *); 90static void mdoc_free1(struct mdoc *); 91static void mdoc_alloc1(struct mdoc *); 92static struct mdoc_node *node_alloc(struct mdoc *, int, int, 93 enum mdoct, enum mdoc_type); 94static int node_append(struct mdoc *, 95 struct mdoc_node *); 96#if 0 97static int mdoc_preptext(struct mdoc *, int, char *, int); 98#endif 99static int mdoc_ptext(struct mdoc *, int, char *, int); 100static int mdoc_pmacro(struct mdoc *, int, char *, int); 101 102const struct mdoc_node * 103mdoc_node(const struct mdoc *m) 104{ 105 106 assert( ! (MDOC_HALT & m->flags)); 107 return(m->first); 108} 109 110 111const struct mdoc_meta * 112mdoc_meta(const struct mdoc *m) 113{ 114 115 assert( ! (MDOC_HALT & m->flags)); 116 return(&m->meta); 117} 118 119 120/* 121 * Frees volatile resources (parse tree, meta-data, fields). 122 */ 123static void 124mdoc_free1(struct mdoc *mdoc) 125{ 126 127 if (mdoc->first) 128 mdoc_node_delete(mdoc, mdoc->first); 129 if (mdoc->meta.title) 130 free(mdoc->meta.title); 131 if (mdoc->meta.os) 132 free(mdoc->meta.os); 133 if (mdoc->meta.name) 134 free(mdoc->meta.name); 135 if (mdoc->meta.arch) 136 free(mdoc->meta.arch); 137 if (mdoc->meta.vol) 138 free(mdoc->meta.vol); 139 if (mdoc->meta.msec) 140 free(mdoc->meta.msec); 141 if (mdoc->meta.date) 142 free(mdoc->meta.date); 143} 144 145 146/* 147 * Allocate all volatile resources (parse tree, meta-data, fields). 148 */ 149static void 150mdoc_alloc1(struct mdoc *mdoc) 151{ 152 153 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 154 mdoc->flags = 0; 155 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 156 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 157 mdoc->first = mdoc->last; 158 mdoc->last->type = MDOC_ROOT; 159 mdoc->last->tok = MDOC_MAX; 160 mdoc->next = MDOC_NEXT_CHILD; 161} 162 163 164/* 165 * Free up volatile resources (see mdoc_free1()) then re-initialises the 166 * data with mdoc_alloc1(). After invocation, parse data has been reset 167 * and the parser is ready for re-invocation on a new tree; however, 168 * cross-parse non-volatile data is kept intact. 169 */ 170void 171mdoc_reset(struct mdoc *mdoc) 172{ 173 174 mdoc_free1(mdoc); 175 mdoc_alloc1(mdoc); 176} 177 178 179/* 180 * Completely free up all volatile and non-volatile parse resources. 181 * After invocation, the pointer is no longer usable. 182 */ 183void 184mdoc_free(struct mdoc *mdoc) 185{ 186 187 mdoc_free1(mdoc); 188 free(mdoc); 189} 190 191 192/* 193 * Allocate volatile and non-volatile parse resources. 194 */ 195struct mdoc * 196mdoc_alloc(struct roff *roff, struct mparse *parse, char *defos) 197{ 198 struct mdoc *p; 199 200 p = mandoc_calloc(1, sizeof(struct mdoc)); 201 202 p->parse = parse; 203 p->defos = defos; 204 p->roff = roff; 205 206 mdoc_hash_init(); 207 mdoc_alloc1(p); 208 return(p); 209} 210 211 212/* 213 * Climb back up the parse tree, validating open scopes. Mostly calls 214 * through to macro_end() in macro.c. 215 */ 216int 217mdoc_endparse(struct mdoc *m) 218{ 219 220 assert( ! (MDOC_HALT & m->flags)); 221 if (mdoc_macroend(m)) 222 return(1); 223 m->flags |= MDOC_HALT; 224 return(0); 225} 226 227int 228mdoc_addeqn(struct mdoc *m, const struct eqn *ep) 229{ 230 struct mdoc_node *n; 231 232 assert( ! (MDOC_HALT & m->flags)); 233 234 /* No text before an initial macro. */ 235 236 if (SEC_NONE == m->lastnamed) { 237 mdoc_pmsg(m, ep->ln, ep->pos, MANDOCERR_NOTEXT); 238 return(1); 239 } 240 241 n = node_alloc(m, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 242 n->eqn = ep; 243 244 if ( ! node_append(m, n)) 245 return(0); 246 247 m->next = MDOC_NEXT_SIBLING; 248 return(1); 249} 250 251int 252mdoc_addspan(struct mdoc *m, const struct tbl_span *sp) 253{ 254 struct mdoc_node *n; 255 256 assert( ! (MDOC_HALT & m->flags)); 257 258 /* No text before an initial macro. */ 259 260 if (SEC_NONE == m->lastnamed) { 261 mdoc_pmsg(m, sp->line, 0, MANDOCERR_NOTEXT); 262 return(1); 263 } 264 265 n = node_alloc(m, sp->line, 0, MDOC_MAX, MDOC_TBL); 266 n->span = sp; 267 268 if ( ! node_append(m, n)) 269 return(0); 270 271 m->next = MDOC_NEXT_SIBLING; 272 return(1); 273} 274 275 276/* 277 * Main parse routine. Parses a single line -- really just hands off to 278 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 279 */ 280int 281mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) 282{ 283 284 assert( ! (MDOC_HALT & m->flags)); 285 286 m->flags |= MDOC_NEWLINE; 287 288 /* 289 * Let the roff nS register switch SYNOPSIS mode early, 290 * such that the parser knows at all times 291 * whether this mode is on or off. 292 * Note that this mode is also switched by the Sh macro. 293 */ 294 if (roff_regisset(m->roff, REG_nS)) { 295 if (roff_regget(m->roff, REG_nS)) 296 m->flags |= MDOC_SYNOPSIS; 297 else 298 m->flags &= ~MDOC_SYNOPSIS; 299 } 300 301 return(roff_getcontrol(m->roff, buf, &offs) ? 302 mdoc_pmacro(m, ln, buf, offs) : 303 mdoc_ptext(m, ln, buf, offs)); 304} 305 306int 307mdoc_macro(MACRO_PROT_ARGS) 308{ 309 assert(tok < MDOC_MAX); 310 311 /* If we're in the body, deny prologue calls. */ 312 313 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 314 MDOC_PBODY & m->flags) { 315 mdoc_pmsg(m, line, ppos, MANDOCERR_BADBODY); 316 return(1); 317 } 318 319 /* If we're in the prologue, deny "body" macros. */ 320 321 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 322 ! (MDOC_PBODY & m->flags)) { 323 mdoc_pmsg(m, line, ppos, MANDOCERR_BADPROLOG); 324 if (NULL == m->meta.msec) 325 m->meta.msec = mandoc_strdup("1"); 326 if (NULL == m->meta.title) 327 m->meta.title = mandoc_strdup("UNKNOWN"); 328 if (NULL == m->meta.vol) 329 m->meta.vol = mandoc_strdup("LOCAL"); 330 if (NULL == m->meta.os) 331 m->meta.os = mandoc_strdup("LOCAL"); 332 if (NULL == m->meta.date) 333 m->meta.date = mandoc_normdate 334 (m->parse, NULL, line, ppos); 335 m->flags |= MDOC_PBODY; 336 } 337 338 return((*mdoc_macros[tok].fp)(m, tok, line, ppos, pos, buf)); 339} 340 341 342static int 343node_append(struct mdoc *mdoc, struct mdoc_node *p) 344{ 345 346 assert(mdoc->last); 347 assert(mdoc->first); 348 assert(MDOC_ROOT != p->type); 349 350 switch (mdoc->next) { 351 case (MDOC_NEXT_SIBLING): 352 mdoc->last->next = p; 353 p->prev = mdoc->last; 354 p->parent = mdoc->last->parent; 355 break; 356 case (MDOC_NEXT_CHILD): 357 mdoc->last->child = p; 358 p->parent = mdoc->last; 359 break; 360 default: 361 abort(); 362 /* NOTREACHED */ 363 } 364 365 p->parent->nchild++; 366 367 /* 368 * Copy over the normalised-data pointer of our parent. Not 369 * everybody has one, but copying a null pointer is fine. 370 */ 371 372 switch (p->type) { 373 case (MDOC_BODY): 374 /* FALLTHROUGH */ 375 case (MDOC_TAIL): 376 /* FALLTHROUGH */ 377 case (MDOC_HEAD): 378 p->norm = p->parent->norm; 379 break; 380 default: 381 break; 382 } 383 384 if ( ! mdoc_valid_pre(mdoc, p)) 385 return(0); 386 387 switch (p->type) { 388 case (MDOC_HEAD): 389 assert(MDOC_BLOCK == p->parent->type); 390 p->parent->head = p; 391 break; 392 case (MDOC_TAIL): 393 assert(MDOC_BLOCK == p->parent->type); 394 p->parent->tail = p; 395 break; 396 case (MDOC_BODY): 397 if (p->end) 398 break; 399 assert(MDOC_BLOCK == p->parent->type); 400 p->parent->body = p; 401 break; 402 default: 403 break; 404 } 405 406 mdoc->last = p; 407 408 switch (p->type) { 409 case (MDOC_TBL): 410 /* FALLTHROUGH */ 411 case (MDOC_TEXT): 412 if ( ! mdoc_valid_post(mdoc)) 413 return(0); 414 break; 415 default: 416 break; 417 } 418 419 return(1); 420} 421 422 423static struct mdoc_node * 424node_alloc(struct mdoc *m, int line, int pos, 425 enum mdoct tok, enum mdoc_type type) 426{ 427 struct mdoc_node *p; 428 429 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 430 p->sec = m->lastsec; 431 p->line = line; 432 p->pos = pos; 433 p->tok = tok; 434 p->type = type; 435 436 /* Flag analysis. */ 437 438 if (MDOC_SYNOPSIS & m->flags) 439 p->flags |= MDOC_SYNPRETTY; 440 else 441 p->flags &= ~MDOC_SYNPRETTY; 442 if (MDOC_NEWLINE & m->flags) 443 p->flags |= MDOC_LINE; 444 m->flags &= ~MDOC_NEWLINE; 445 446 return(p); 447} 448 449 450int 451mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 452{ 453 struct mdoc_node *p; 454 455 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 456 if ( ! node_append(m, p)) 457 return(0); 458 m->next = MDOC_NEXT_CHILD; 459 return(1); 460} 461 462 463int 464mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 465{ 466 struct mdoc_node *p; 467 468 assert(m->first); 469 assert(m->last); 470 471 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 472 if ( ! node_append(m, p)) 473 return(0); 474 m->next = MDOC_NEXT_CHILD; 475 return(1); 476} 477 478 479int 480mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 481{ 482 struct mdoc_node *p; 483 484 p = node_alloc(m, line, pos, tok, MDOC_BODY); 485 if ( ! node_append(m, p)) 486 return(0); 487 m->next = MDOC_NEXT_CHILD; 488 return(1); 489} 490 491 492int 493mdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok, 494 struct mdoc_node *body, enum mdoc_endbody end) 495{ 496 struct mdoc_node *p; 497 498 p = node_alloc(m, line, pos, tok, MDOC_BODY); 499 p->pending = body; 500 p->end = end; 501 if ( ! node_append(m, p)) 502 return(0); 503 m->next = MDOC_NEXT_SIBLING; 504 return(1); 505} 506 507 508int 509mdoc_block_alloc(struct mdoc *m, int line, int pos, 510 enum mdoct tok, struct mdoc_arg *args) 511{ 512 struct mdoc_node *p; 513 514 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 515 p->args = args; 516 if (p->args) 517 (args->refcnt)++; 518 519 switch (tok) { 520 case (MDOC_Bd): 521 /* FALLTHROUGH */ 522 case (MDOC_Bf): 523 /* FALLTHROUGH */ 524 case (MDOC_Bl): 525 /* FALLTHROUGH */ 526 case (MDOC_Rs): 527 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 528 break; 529 default: 530 break; 531 } 532 533 if ( ! node_append(m, p)) 534 return(0); 535 m->next = MDOC_NEXT_CHILD; 536 return(1); 537} 538 539 540int 541mdoc_elem_alloc(struct mdoc *m, int line, int pos, 542 enum mdoct tok, struct mdoc_arg *args) 543{ 544 struct mdoc_node *p; 545 546 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 547 p->args = args; 548 if (p->args) 549 (args->refcnt)++; 550 551 switch (tok) { 552 case (MDOC_An): 553 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 554 break; 555 default: 556 break; 557 } 558 559 if ( ! node_append(m, p)) 560 return(0); 561 m->next = MDOC_NEXT_CHILD; 562 return(1); 563} 564 565int 566mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 567{ 568 struct mdoc_node *n; 569 570 n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT); 571 n->string = roff_strdup(m->roff, p); 572 573 if ( ! node_append(m, n)) 574 return(0); 575 576 m->next = MDOC_NEXT_SIBLING; 577 return(1); 578} 579 580 581static void 582mdoc_node_free(struct mdoc_node *p) 583{ 584 585 if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 586 free(p->norm); 587 if (p->string) 588 free(p->string); 589 if (p->args) 590 mdoc_argv_free(p->args); 591 free(p); 592} 593 594 595static void 596mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n) 597{ 598 599 /* Adjust siblings. */ 600 601 if (n->prev) 602 n->prev->next = n->next; 603 if (n->next) 604 n->next->prev = n->prev; 605 606 /* Adjust parent. */ 607 608 if (n->parent) { 609 n->parent->nchild--; 610 if (n->parent->child == n) 611 n->parent->child = n->prev ? n->prev : n->next; 612 if (n->parent->last == n) 613 n->parent->last = n->prev ? n->prev : NULL; 614 } 615 616 /* Adjust parse point, if applicable. */ 617 618 if (m && m->last == n) { 619 if (n->prev) { 620 m->last = n->prev; 621 m->next = MDOC_NEXT_SIBLING; 622 } else { 623 m->last = n->parent; 624 m->next = MDOC_NEXT_CHILD; 625 } 626 } 627 628 if (m && m->first == n) 629 m->first = NULL; 630} 631 632 633void 634mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) 635{ 636 637 while (p->child) { 638 assert(p->nchild); 639 mdoc_node_delete(m, p->child); 640 } 641 assert(0 == p->nchild); 642 643 mdoc_node_unlink(m, p); 644 mdoc_node_free(p); 645} 646 647#if 0 648/* 649 * Pre-treat a text line. 650 * Text lines can consist of equations, which must be handled apart from 651 * the regular text. 652 * Thus, use this function to step through a line checking if it has any 653 * equations embedded in it. 654 * This must handle multiple equations AND equations that do not end at 655 * the end-of-line, i.e., will re-enter in the next roff parse. 656 */ 657static int 658mdoc_preptext(struct mdoc *m, int line, char *buf, int offs) 659{ 660 char *start, *end; 661 char delim; 662 663 while ('\0' != buf[offs]) { 664 /* Mark starting position if eqn is set. */ 665 start = NULL; 666 if ('\0' != (delim = roff_eqndelim(m->roff))) 667 if (NULL != (start = strchr(buf + offs, delim))) 668 *start++ = '\0'; 669 670 /* Parse text as normal. */ 671 if ( ! mdoc_ptext(m, line, buf, offs)) 672 return(0); 673 674 /* Continue only if an equation exists. */ 675 if (NULL == start) 676 break; 677 678 /* Read past the end of the equation. */ 679 offs += start - (buf + offs); 680 assert(start == &buf[offs]); 681 if (NULL != (end = strchr(buf + offs, delim))) { 682 *end++ = '\0'; 683 while (' ' == *end) 684 end++; 685 } 686 687 /* Parse the equation itself. */ 688 roff_openeqn(m->roff, NULL, line, offs, buf); 689 690 /* Process a finished equation? */ 691 if (roff_closeeqn(m->roff)) 692 if ( ! mdoc_addeqn(m, roff_eqn(m->roff))) 693 return(0); 694 offs += (end - (buf + offs)); 695 } 696 697 return(1); 698} 699#endif 700 701/* 702 * Parse free-form text, that is, a line that does not begin with the 703 * control character. 704 */ 705static int 706mdoc_ptext(struct mdoc *m, int line, char *buf, int offs) 707{ 708 char *c, *ws, *end; 709 struct mdoc_node *n; 710 711 /* No text before an initial macro. */ 712 713 if (SEC_NONE == m->lastnamed) { 714 mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT); 715 return(1); 716 } 717 718 assert(m->last); 719 n = m->last; 720 721 /* 722 * Divert directly to list processing if we're encountering a 723 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 724 * (a MDOC_BODY means it's already open, in which case we should 725 * process within its context in the normal way). 726 */ 727 728 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 729 LIST_column == n->norm->Bl.type) { 730 /* `Bl' is open without any children. */ 731 m->flags |= MDOC_FREECOL; 732 return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); 733 } 734 735 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 736 NULL != n->parent && 737 MDOC_Bl == n->parent->tok && 738 LIST_column == n->parent->norm->Bl.type) { 739 /* `Bl' has block-level `It' children. */ 740 m->flags |= MDOC_FREECOL; 741 return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); 742 } 743 744 /* 745 * Search for the beginning of unescaped trailing whitespace (ws) 746 * and for the first character not to be output (end). 747 */ 748 749 /* FIXME: replace with strcspn(). */ 750 ws = NULL; 751 for (c = end = buf + offs; *c; c++) { 752 switch (*c) { 753 case ' ': 754 if (NULL == ws) 755 ws = c; 756 continue; 757 case '\t': 758 /* 759 * Always warn about trailing tabs, 760 * even outside literal context, 761 * where they should be put on the next line. 762 */ 763 if (NULL == ws) 764 ws = c; 765 /* 766 * Strip trailing tabs in literal context only; 767 * outside, they affect the next line. 768 */ 769 if (MDOC_LITERAL & m->flags) 770 continue; 771 break; 772 case '\\': 773 /* Skip the escaped character, too, if any. */ 774 if (c[1]) 775 c++; 776 /* FALLTHROUGH */ 777 default: 778 ws = NULL; 779 break; 780 } 781 end = c + 1; 782 } 783 *end = '\0'; 784 785 if (ws) 786 mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 787 788 if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) { 789 mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 790 791 /* 792 * Insert a `sp' in the case of a blank line. Technically, 793 * blank lines aren't allowed, but enough manuals assume this 794 * behaviour that we want to work around it. 795 */ 796 if ( ! mdoc_elem_alloc(m, line, offs, MDOC_sp, NULL)) 797 return(0); 798 799 m->next = MDOC_NEXT_SIBLING; 800 801 return(mdoc_valid_post(m)); 802 } 803 804 if ( ! mdoc_word_alloc(m, line, offs, buf+offs)) 805 return(0); 806 807 if (MDOC_LITERAL & m->flags) 808 return(1); 809 810 /* 811 * End-of-sentence check. If the last character is an unescaped 812 * EOS character, then flag the node as being the end of a 813 * sentence. The front-end will know how to interpret this. 814 */ 815 816 assert(buf < end); 817 818 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0)) 819 m->last->flags |= MDOC_EOS; 820 821 return(1); 822} 823 824 825/* 826 * Parse a macro line, that is, a line beginning with the control 827 * character. 828 */ 829static int 830mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) 831{ 832 enum mdoct tok; 833 int i, sv; 834 char mac[5]; 835 struct mdoc_node *n; 836 837 /* Empty post-control lines are ignored. */ 838 839 if ('"' == buf[offs]) { 840 mdoc_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT); 841 return(1); 842 } else if ('\0' == buf[offs]) 843 return(1); 844 845 sv = offs; 846 847 /* 848 * Copy the first word into a nil-terminated buffer. 849 * Stop copying when a tab, space, or eoln is encountered. 850 */ 851 852 i = 0; 853 while (i < 4 && '\0' != buf[offs] && 854 ' ' != buf[offs] && '\t' != buf[offs]) 855 mac[i++] = buf[offs++]; 856 857 mac[i] = '\0'; 858 859 tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 860 861 if (MDOC_MAX == tok) { 862 mandoc_vmsg(MANDOCERR_MACRO, m->parse, 863 ln, sv, "%s", buf + sv - 1); 864 return(1); 865 } 866 867 /* Disregard the first trailing tab, if applicable. */ 868 869 if ('\t' == buf[offs]) 870 offs++; 871 872 /* Jump to the next non-whitespace word. */ 873 874 while (buf[offs] && ' ' == buf[offs]) 875 offs++; 876 877 /* 878 * Trailing whitespace. Note that tabs are allowed to be passed 879 * into the parser as "text", so we only warn about spaces here. 880 */ 881 882 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 883 mdoc_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE); 884 885 /* 886 * If an initial macro or a list invocation, divert directly 887 * into macro processing. 888 */ 889 890 if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) { 891 if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) 892 goto err; 893 return(1); 894 } 895 896 n = m->last; 897 assert(m->last); 898 899 /* 900 * If the first macro of a `Bl -column', open an `It' block 901 * context around the parsed macro. 902 */ 903 904 if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 905 LIST_column == n->norm->Bl.type) { 906 m->flags |= MDOC_FREECOL; 907 if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) 908 goto err; 909 return(1); 910 } 911 912 /* 913 * If we're following a block-level `It' within a `Bl -column' 914 * context (perhaps opened in the above block or in ptext()), 915 * then open an `It' block context around the parsed macro. 916 */ 917 918 if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 919 NULL != n->parent && 920 MDOC_Bl == n->parent->tok && 921 LIST_column == n->parent->norm->Bl.type) { 922 m->flags |= MDOC_FREECOL; 923 if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) 924 goto err; 925 return(1); 926 } 927 928 /* Normal processing of a macro. */ 929 930 if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) 931 goto err; 932 933 return(1); 934 935err: /* Error out. */ 936 937 m->flags |= MDOC_HALT; 938 return(0); 939} 940 941enum mdelim 942mdoc_isdelim(const char *p) 943{ 944 945 if ('\0' == p[0]) 946 return(DELIM_NONE); 947 948 if ('\0' == p[1]) 949 switch (p[0]) { 950 case('('): 951 /* FALLTHROUGH */ 952 case('['): 953 return(DELIM_OPEN); 954 case('|'): 955 return(DELIM_MIDDLE); 956 case('.'): 957 /* FALLTHROUGH */ 958 case(','): 959 /* FALLTHROUGH */ 960 case(';'): 961 /* FALLTHROUGH */ 962 case(':'): 963 /* FALLTHROUGH */ 964 case('?'): 965 /* FALLTHROUGH */ 966 case('!'): 967 /* FALLTHROUGH */ 968 case(')'): 969 /* FALLTHROUGH */ 970 case(']'): 971 return(DELIM_CLOSE); 972 default: 973 return(DELIM_NONE); 974 } 975 976 if ('\\' != p[0]) 977 return(DELIM_NONE); 978 979 if (0 == strcmp(p + 1, ".")) 980 return(DELIM_CLOSE); 981 if (0 == strcmp(p + 1, "*(Ba")) 982 return(DELIM_MIDDLE); 983 984 return(DELIM_NONE); 985} 986