mdoc.c revision 1.1.1.5
1/* $Vendor-Id: mdoc.c,v 1.118 2010/03/31 07:42:04 kristaps Exp $ */ 2/* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#ifdef HAVE_CONFIG_H 18#include "config.h" 19#endif 20 21#include <sys/types.h> 22 23#include <assert.h> 24#include <ctype.h> 25#include <stdarg.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29 30#include "libmdoc.h" 31#include "libmandoc.h" 32 33const char *const __mdoc_merrnames[MERRMAX] = { 34 "trailing whitespace", /* ETAILWS */ 35 "unexpected quoted parameter", /* EQUOTPARM */ 36 "unterminated quoted parameter", /* EQUOTTERM */ 37 "argument parameter suggested", /* EARGVAL */ 38 "macro disallowed in prologue", /* EBODYPROL */ 39 "macro disallowed in body", /* EPROLBODY */ 40 "text disallowed in prologue", /* ETEXTPROL */ 41 "blank line disallowed", /* ENOBLANK */ 42 "text parameter too long", /* ETOOLONG */ 43 "invalid escape sequence", /* EESCAPE */ 44 "invalid character", /* EPRINT */ 45 "document has no body", /* ENODAT */ 46 "document has no prologue", /* ENOPROLOGUE */ 47 "expected line arguments", /* ELINE */ 48 "invalid AT&T argument", /* EATT */ 49 "default name not yet set", /* ENAME */ 50 "missing list type", /* ELISTTYPE */ 51 "missing display type", /* EDISPTYPE */ 52 "too many display types", /* EMULTIDISP */ 53 "too many list types", /* EMULTILIST */ 54 "NAME section must be first", /* ESECNAME */ 55 "badly-formed NAME section", /* ENAMESECINC */ 56 "argument repeated", /* EARGREP */ 57 "expected boolean parameter", /* EBOOL */ 58 "inconsistent column syntax", /* ECOLMIS */ 59 "nested display invalid", /* ENESTDISP */ 60 "width argument missing", /* EMISSWIDTH */ 61 "invalid section for this manual section", /* EWRONGMSEC */ 62 "section out of conventional order", /* ESECOOO */ 63 "section repeated", /* ESECREP */ 64 "invalid standard argument", /* EBADSTAND */ 65 "multi-line arguments discouraged", /* ENOMULTILINE */ 66 "multi-line arguments suggested", /* EMULTILINE */ 67 "line arguments discouraged", /* ENOLINE */ 68 "prologue macro out of conventional order", /* EPROLOOO */ 69 "prologue macro repeated", /* EPROLREP */ 70 "invalid manual section", /* EBADMSEC */ 71 "invalid section", /* EBADSEC */ 72 "invalid font mode", /* EFONT */ 73 "invalid date syntax", /* EBADDATE */ 74 "invalid number format", /* ENUMFMT */ 75 "superfluous width argument", /* ENOWIDTH */ 76 "system: utsname error", /* EUTSNAME */ 77 "obsolete macro", /* EOBS */ 78 "end-of-line scope violation", /* EIMPBRK */ 79 "empty macro ignored", /* EIGNE */ 80 "unclosed explicit scope", /* EOPEN */ 81 "unterminated quoted phrase", /* EQUOTPHR */ 82 "closure macro without prior context", /* ENOCTX */ 83 "no description found for library", /* ELIB */ 84 "bad child for parent context", /* EBADCHILD */ 85 "list arguments preceding type", /* ENOTYPE */ 86}; 87 88const char *const __mdoc_macronames[MDOC_MAX] = { 89 "Ap", "Dd", "Dt", "Os", 90 "Sh", "Ss", "Pp", "D1", 91 "Dl", "Bd", "Ed", "Bl", 92 "El", "It", "Ad", "An", 93 "Ar", "Cd", "Cm", "Dv", 94 "Er", "Ev", "Ex", "Fa", 95 "Fd", "Fl", "Fn", "Ft", 96 "Ic", "In", "Li", "Nd", 97 "Nm", "Op", "Ot", "Pa", 98 "Rv", "St", "Va", "Vt", 99 /* LINTED */ 100 "Xr", "%A", "%B", "%D", 101 /* LINTED */ 102 "%I", "%J", "%N", "%O", 103 /* LINTED */ 104 "%P", "%R", "%T", "%V", 105 "Ac", "Ao", "Aq", "At", 106 "Bc", "Bf", "Bo", "Bq", 107 "Bsx", "Bx", "Db", "Dc", 108 "Do", "Dq", "Ec", "Ef", 109 "Em", "Eo", "Fx", "Ms", 110 "No", "Ns", "Nx", "Ox", 111 "Pc", "Pf", "Po", "Pq", 112 "Qc", "Ql", "Qo", "Qq", 113 "Re", "Rs", "Sc", "So", 114 "Sq", "Sm", "Sx", "Sy", 115 "Tn", "Ux", "Xc", "Xo", 116 "Fo", "Fc", "Oo", "Oc", 117 "Bk", "Ek", "Bt", "Hf", 118 "Fr", "Ud", "Lb", "Lp", 119 "Lk", "Mt", "Brq", "Bro", 120 /* LINTED */ 121 "Brc", "%C", "Es", "En", 122 /* LINTED */ 123 "Dx", "%Q", "br", "sp", 124 /* LINTED */ 125 "%U" 126 }; 127 128const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 129 "split", "nosplit", "ragged", 130 "unfilled", "literal", "file", 131 "offset", "bullet", "dash", 132 "hyphen", "item", "enum", 133 "tag", "diag", "hang", 134 "ohang", "inset", "column", 135 "width", "compact", "std", 136 "filled", "words", "emphasis", 137 "symbolic", "nested", "centered" 138 }; 139 140const char * const *mdoc_macronames = __mdoc_macronames; 141const char * const *mdoc_argnames = __mdoc_argnames; 142 143static void mdoc_free1(struct mdoc *); 144static void mdoc_alloc1(struct mdoc *); 145static struct mdoc_node *node_alloc(struct mdoc *, int, int, 146 enum mdoct, enum mdoc_type); 147static int node_append(struct mdoc *, 148 struct mdoc_node *); 149static int parsetext(struct mdoc *, int, char *); 150static int parsemacro(struct mdoc *, int, char *); 151static int macrowarn(struct mdoc *, int, const char *); 152static int pstring(struct mdoc *, int, int, 153 const char *, size_t); 154 155const struct mdoc_node * 156mdoc_node(const struct mdoc *m) 157{ 158 159 return(MDOC_HALT & m->flags ? NULL : m->first); 160} 161 162 163const struct mdoc_meta * 164mdoc_meta(const struct mdoc *m) 165{ 166 167 return(MDOC_HALT & m->flags ? NULL : &m->meta); 168} 169 170 171/* 172 * Frees volatile resources (parse tree, meta-data, fields). 173 */ 174static void 175mdoc_free1(struct mdoc *mdoc) 176{ 177 178 if (mdoc->first) 179 mdoc_node_freelist(mdoc->first); 180 if (mdoc->meta.title) 181 free(mdoc->meta.title); 182 if (mdoc->meta.os) 183 free(mdoc->meta.os); 184 if (mdoc->meta.name) 185 free(mdoc->meta.name); 186 if (mdoc->meta.arch) 187 free(mdoc->meta.arch); 188 if (mdoc->meta.vol) 189 free(mdoc->meta.vol); 190} 191 192 193/* 194 * Allocate all volatile resources (parse tree, meta-data, fields). 195 */ 196static void 197mdoc_alloc1(struct mdoc *mdoc) 198{ 199 200 memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 201 mdoc->flags = 0; 202 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 203 mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 204 mdoc->first = mdoc->last; 205 mdoc->last->type = MDOC_ROOT; 206 mdoc->next = MDOC_NEXT_CHILD; 207} 208 209 210/* 211 * Free up volatile resources (see mdoc_free1()) then re-initialises the 212 * data with mdoc_alloc1(). After invocation, parse data has been reset 213 * and the parser is ready for re-invocation on a new tree; however, 214 * cross-parse non-volatile data is kept intact. 215 */ 216void 217mdoc_reset(struct mdoc *mdoc) 218{ 219 220 mdoc_free1(mdoc); 221 mdoc_alloc1(mdoc); 222} 223 224 225/* 226 * Completely free up all volatile and non-volatile parse resources. 227 * After invocation, the pointer is no longer usable. 228 */ 229void 230mdoc_free(struct mdoc *mdoc) 231{ 232 233 mdoc_free1(mdoc); 234 free(mdoc); 235} 236 237 238/* 239 * Allocate volatile and non-volatile parse resources. 240 */ 241struct mdoc * 242mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 243{ 244 struct mdoc *p; 245 246 p = mandoc_calloc(1, sizeof(struct mdoc)); 247 248 if (cb) 249 memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 250 251 p->data = data; 252 p->pflags = pflags; 253 254 mdoc_hash_init(); 255 mdoc_alloc1(p); 256 return(p); 257} 258 259 260/* 261 * Climb back up the parse tree, validating open scopes. Mostly calls 262 * through to macro_end() in macro.c. 263 */ 264int 265mdoc_endparse(struct mdoc *m) 266{ 267 268 if (MDOC_HALT & m->flags) 269 return(0); 270 else if (mdoc_macroend(m)) 271 return(1); 272 m->flags |= MDOC_HALT; 273 return(0); 274} 275 276 277/* 278 * Main parse routine. Parses a single line -- really just hands off to 279 * the macro (parsemacro()) or text parser (parsetext()). 280 */ 281int 282mdoc_parseln(struct mdoc *m, int ln, char *buf) 283{ 284 285 if (MDOC_HALT & m->flags) 286 return(0); 287 288 return('.' == *buf ? parsemacro(m, ln, buf) : 289 parsetext(m, ln, buf)); 290} 291 292 293int 294mdoc_verr(struct mdoc *mdoc, int ln, int pos, 295 const char *fmt, ...) 296{ 297 char buf[256]; 298 va_list ap; 299 300 if (NULL == mdoc->cb.mdoc_err) 301 return(0); 302 303 va_start(ap, fmt); 304 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 305 va_end(ap); 306 307 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 308} 309 310 311int 312mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 313{ 314 char buf[256]; 315 va_list ap; 316 317 if (NULL == mdoc->cb.mdoc_warn) 318 return(0); 319 320 va_start(ap, fmt); 321 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 322 va_end(ap); 323 324 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf)); 325} 326 327 328int 329mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type) 330{ 331 const char *p; 332 333 p = __mdoc_merrnames[(int)type]; 334 assert(p); 335 336 if (iserr) 337 return(mdoc_verr(m, line, pos, p)); 338 339 return(mdoc_vwarn(m, line, pos, p)); 340} 341 342 343int 344mdoc_macro(struct mdoc *m, enum mdoct tok, 345 int ln, int pp, int *pos, char *buf) 346{ 347 348 assert(tok < MDOC_MAX); 349 /* 350 * If we're in the prologue, deny "body" macros. Similarly, if 351 * we're in the body, deny prologue calls. 352 */ 353 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 354 MDOC_PBODY & m->flags) 355 return(mdoc_perr(m, ln, pp, EPROLBODY)); 356 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 357 ! (MDOC_PBODY & m->flags)) 358 return(mdoc_perr(m, ln, pp, EBODYPROL)); 359 360 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 361} 362 363 364static int 365node_append(struct mdoc *mdoc, struct mdoc_node *p) 366{ 367 368 assert(mdoc->last); 369 assert(mdoc->first); 370 assert(MDOC_ROOT != p->type); 371 372 switch (mdoc->next) { 373 case (MDOC_NEXT_SIBLING): 374 mdoc->last->next = p; 375 p->prev = mdoc->last; 376 p->parent = mdoc->last->parent; 377 break; 378 case (MDOC_NEXT_CHILD): 379 mdoc->last->child = p; 380 p->parent = mdoc->last; 381 break; 382 default: 383 abort(); 384 /* NOTREACHED */ 385 } 386 387 p->parent->nchild++; 388 389 if ( ! mdoc_valid_pre(mdoc, p)) 390 return(0); 391 if ( ! mdoc_action_pre(mdoc, p)) 392 return(0); 393 394 switch (p->type) { 395 case (MDOC_HEAD): 396 assert(MDOC_BLOCK == p->parent->type); 397 p->parent->head = p; 398 break; 399 case (MDOC_TAIL): 400 assert(MDOC_BLOCK == p->parent->type); 401 p->parent->tail = p; 402 break; 403 case (MDOC_BODY): 404 assert(MDOC_BLOCK == p->parent->type); 405 p->parent->body = p; 406 break; 407 default: 408 break; 409 } 410 411 mdoc->last = p; 412 413 switch (p->type) { 414 case (MDOC_TEXT): 415 if ( ! mdoc_valid_post(mdoc)) 416 return(0); 417 if ( ! mdoc_action_post(mdoc)) 418 return(0); 419 break; 420 default: 421 break; 422 } 423 424 return(1); 425} 426 427 428static struct mdoc_node * 429node_alloc(struct mdoc *m, int line, int pos, 430 enum mdoct tok, enum mdoc_type type) 431{ 432 struct mdoc_node *p; 433 434 p = mandoc_calloc(1, sizeof(struct mdoc_node)); 435 p->sec = m->lastsec; 436 p->line = line; 437 p->pos = pos; 438 p->tok = tok; 439 p->type = type; 440 441 return(p); 442} 443 444 445int 446mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 447{ 448 struct mdoc_node *p; 449 450 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 451 if ( ! node_append(m, p)) 452 return(0); 453 m->next = MDOC_NEXT_CHILD; 454 return(1); 455} 456 457 458int 459mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 460{ 461 struct mdoc_node *p; 462 463 assert(m->first); 464 assert(m->last); 465 466 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 467 if ( ! node_append(m, p)) 468 return(0); 469 m->next = MDOC_NEXT_CHILD; 470 return(1); 471} 472 473 474int 475mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 476{ 477 struct mdoc_node *p; 478 479 p = node_alloc(m, line, pos, tok, MDOC_BODY); 480 if ( ! node_append(m, p)) 481 return(0); 482 m->next = MDOC_NEXT_CHILD; 483 return(1); 484} 485 486 487int 488mdoc_block_alloc(struct mdoc *m, int line, int pos, 489 enum mdoct tok, struct mdoc_arg *args) 490{ 491 struct mdoc_node *p; 492 493 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 494 p->args = args; 495 if (p->args) 496 (args->refcnt)++; 497 if ( ! node_append(m, p)) 498 return(0); 499 m->next = MDOC_NEXT_CHILD; 500 return(1); 501} 502 503 504int 505mdoc_elem_alloc(struct mdoc *m, int line, int pos, 506 enum mdoct tok, struct mdoc_arg *args) 507{ 508 struct mdoc_node *p; 509 510 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 511 p->args = args; 512 if (p->args) 513 (args->refcnt)++; 514 if ( ! node_append(m, p)) 515 return(0); 516 m->next = MDOC_NEXT_CHILD; 517 return(1); 518} 519 520 521static int 522pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) 523{ 524 struct mdoc_node *n; 525 size_t sv; 526 527 n = node_alloc(m, line, pos, -1, MDOC_TEXT); 528 n->string = mandoc_malloc(len + 1); 529 sv = strlcpy(n->string, p, len + 1); 530 531 /* Prohibit truncation. */ 532 assert(sv < len + 1); 533 534 if ( ! node_append(m, n)) 535 return(0); 536 m->next = MDOC_NEXT_SIBLING; 537 return(1); 538} 539 540 541int 542mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 543{ 544 545 return(pstring(m, line, pos, p, strlen(p))); 546} 547 548 549void 550mdoc_node_free(struct mdoc_node *p) 551{ 552 553 if (p->parent) 554 p->parent->nchild--; 555 if (p->string) 556 free(p->string); 557 if (p->args) 558 mdoc_argv_free(p->args); 559 free(p); 560} 561 562 563void 564mdoc_node_freelist(struct mdoc_node *p) 565{ 566 567 if (p->child) 568 mdoc_node_freelist(p->child); 569 if (p->next) 570 mdoc_node_freelist(p->next); 571 572 assert(0 == p->nchild); 573 mdoc_node_free(p); 574} 575 576 577/* 578 * Parse free-form text, that is, a line that does not begin with the 579 * control character. 580 */ 581static int 582parsetext(struct mdoc *m, int line, char *buf) 583{ 584 int i, j; 585 char sv; 586 587 if (SEC_NONE == m->lastnamed) 588 return(mdoc_perr(m, line, 0, ETEXTPROL)); 589 590 /* 591 * If in literal mode, then pass the buffer directly to the 592 * back-end, as it should be preserved as a single term. 593 */ 594 595 if (MDOC_LITERAL & m->flags) 596 return(mdoc_word_alloc(m, line, 0, buf)); 597 598 /* Disallow blank/white-space lines in non-literal mode. */ 599 600 for (i = 0; ' ' == buf[i]; i++) 601 /* Skip leading whitespace. */ ; 602 603 if ('\0' == buf[i]) 604 return(mdoc_perr(m, line, 0, ENOBLANK)); 605 606 /* 607 * Break apart a free-form line into tokens. Spaces are 608 * stripped out of the input. 609 */ 610 611 for (j = i; buf[i]; i++) { 612 if (' ' != buf[i]) 613 continue; 614 615 /* Escaped whitespace. */ 616 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 617 continue; 618 619 sv = buf[i]; 620 buf[i++] = '\0'; 621 622 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 623 return(0); 624 625 /* Trailing whitespace? Check at overwritten byte. */ 626 627 if (' ' == sv && '\0' == buf[i]) 628 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS)) 629 return(0); 630 631 for ( ; ' ' == buf[i]; i++) 632 /* Skip trailing whitespace. */ ; 633 634 j = i; 635 636 /* Trailing whitespace? */ 637 638 if (' ' == buf[i - 1] && '\0' == buf[i]) 639 if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS)) 640 return(0); 641 642 if ('\0' == buf[i]) 643 break; 644 } 645 646 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 647 return(0); 648 649 m->next = MDOC_NEXT_SIBLING; 650 return(1); 651} 652 653 654 655static int 656macrowarn(struct mdoc *m, int ln, const char *buf) 657{ 658 if ( ! (MDOC_IGN_MACRO & m->pflags)) 659 return(mdoc_verr(m, ln, 0, 660 "unknown macro: %s%s", 661 buf, strlen(buf) > 3 ? "..." : "")); 662 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", 663 buf, strlen(buf) > 3 ? "..." : "")); 664} 665 666 667/* 668 * Parse a macro line, that is, a line beginning with the control 669 * character. 670 */ 671int 672parsemacro(struct mdoc *m, int ln, char *buf) 673{ 674 int i, j, c; 675 char mac[5]; 676 677 /* Empty lines are ignored. */ 678 679 if ('\0' == buf[1]) 680 return(1); 681 682 i = 1; 683 684 /* Accept whitespace after the initial control char. */ 685 686 if (' ' == buf[i]) { 687 i++; 688 while (buf[i] && ' ' == buf[i]) 689 i++; 690 if ('\0' == buf[i]) 691 return(1); 692 } 693 694 /* Copy the first word into a nil-terminated buffer. */ 695 696 for (j = 0; j < 4; j++, i++) { 697 if ('\0' == (mac[j] = buf[i])) 698 break; 699 else if (' ' == buf[i]) 700 break; 701 702 /* Check for invalid characters. */ 703 704 if (isgraph((u_char)buf[i])) 705 continue; 706 return(mdoc_perr(m, ln, i, EPRINT)); 707 } 708 709 mac[j] = 0; 710 711 if (j == 4 || j < 2) { 712 if ( ! macrowarn(m, ln, mac)) 713 goto err; 714 return(1); 715 } 716 717 if (MDOC_MAX == (c = mdoc_hash_find(mac))) { 718 if ( ! macrowarn(m, ln, mac)) 719 goto err; 720 return(1); 721 } 722 723 /* The macro is sane. Jump to the next word. */ 724 725 while (buf[i] && ' ' == buf[i]) 726 i++; 727 728 /* Trailing whitespace? */ 729 730 if ('\0' == buf[i] && ' ' == buf[i - 1]) 731 if ( ! mdoc_pwarn(m, ln, i - 1, ETAILWS)) 732 goto err; 733 734 /* 735 * Begin recursive parse sequence. Since we're at the start of 736 * the line, we don't need to do callable/parseable checks. 737 */ 738 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 739 goto err; 740 741 return(1); 742 743err: /* Error out. */ 744 745 m->flags |= MDOC_HALT; 746 return(0); 747} 748 749 750