mdoc.c revision 1.1.1.2
1/* $Vendor-Id: mdoc.c,v 1.111 2009/10/26 07:11:07 kristaps Exp $ */ 2/* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <sys/types.h> 18 19#include <assert.h> 20#include <ctype.h> 21#include <stdarg.h> 22#include <stdio.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include "libmdoc.h" 27 28const char *const __mdoc_merrnames[MERRMAX] = { 29 "trailing whitespace", /* ETAILWS */ 30 "unexpected quoted parameter", /* EQUOTPARM */ 31 "unterminated quoted parameter", /* EQUOTTERM */ 32 "system: malloc error", /* EMALLOC */ 33 "argument parameter suggested", /* EARGVAL */ 34 "macro disallowed in prologue", /* EBODYPROL */ 35 "macro disallowed in body", /* EPROLBODY */ 36 "text disallowed in prologue", /* ETEXTPROL */ 37 "blank line disallowed", /* ENOBLANK */ 38 "text parameter too long", /* ETOOLONG */ 39 "invalid escape sequence", /* EESCAPE */ 40 "invalid character", /* EPRINT */ 41 "document has no body", /* ENODAT */ 42 "document has no prologue", /* ENOPROLOGUE */ 43 "expected line arguments", /* ELINE */ 44 "invalid AT&T argument", /* EATT */ 45 "default name not yet set", /* ENAME */ 46 "missing list type", /* ELISTTYPE */ 47 "missing display type", /* EDISPTYPE */ 48 "too many display types", /* EMULTIDISP */ 49 "too many list types", /* EMULTILIST */ 50 "NAME section must be first", /* ESECNAME */ 51 "badly-formed NAME section", /* ENAMESECINC */ 52 "argument repeated", /* EARGREP */ 53 "expected boolean parameter", /* EBOOL */ 54 "inconsistent column syntax", /* ECOLMIS */ 55 "nested display invalid", /* ENESTDISP */ 56 "width argument missing", /* EMISSWIDTH */ 57 "invalid section for this manual section", /* EWRONGMSEC */ 58 "section out of conventional order", /* ESECOOO */ 59 "section repeated", /* ESECREP */ 60 "invalid standard argument", /* EBADSTAND */ 61 "multi-line arguments discouraged", /* ENOMULTILINE */ 62 "multi-line arguments suggested", /* EMULTILINE */ 63 "line arguments discouraged", /* ENOLINE */ 64 "prologue macro out of conventional order", /* EPROLOOO */ 65 "prologue macro repeated", /* EPROLREP */ 66 "invalid manual section", /* EBADMSEC */ 67 "invalid section", /* EBADSEC */ 68 "invalid font mode", /* EFONT */ 69 "invalid date syntax", /* EBADDATE */ 70 "invalid number format", /* ENUMFMT */ 71 "superfluous width argument", /* ENOWIDTH */ 72 "system: utsname error", /* EUTSNAME */ 73 "obsolete macro", /* EOBS */ 74 "end-of-line scope violation", /* EIMPBRK */ 75 "empty macro ignored", /* EIGNE */ 76 "unclosed explicit scope", /* EOPEN */ 77 "unterminated quoted phrase", /* EQUOTPHR */ 78 "closure macro without prior context", /* ENOCTX */ 79 "no description found for library", /* ELIB */ 80 "bad child for parent context", /* EBADCHILD */ 81 "list arguments preceding type", /* ENOTYPE */ 82}; 83 84const char *const __mdoc_macronames[MDOC_MAX] = { 85 "Ap", "Dd", "Dt", "Os", 86 "Sh", "Ss", "Pp", "D1", 87 "Dl", "Bd", "Ed", "Bl", 88 "El", "It", "Ad", "An", 89 "Ar", "Cd", "Cm", "Dv", 90 "Er", "Ev", "Ex", "Fa", 91 "Fd", "Fl", "Fn", "Ft", 92 "Ic", "In", "Li", "Nd", 93 "Nm", "Op", "Ot", "Pa", 94 "Rv", "St", "Va", "Vt", 95 /* LINTED */ 96 "Xr", "\%A", "\%B", "\%D", 97 /* LINTED */ 98 "\%I", "\%J", "\%N", "\%O", 99 /* LINTED */ 100 "\%P", "\%R", "\%T", "\%V", 101 "Ac", "Ao", "Aq", "At", 102 "Bc", "Bf", "Bo", "Bq", 103 "Bsx", "Bx", "Db", "Dc", 104 "Do", "Dq", "Ec", "Ef", 105 "Em", "Eo", "Fx", "Ms", 106 "No", "Ns", "Nx", "Ox", 107 "Pc", "Pf", "Po", "Pq", 108 "Qc", "Ql", "Qo", "Qq", 109 "Re", "Rs", "Sc", "So", 110 "Sq", "Sm", "Sx", "Sy", 111 "Tn", "Ux", "Xc", "Xo", 112 "Fo", "Fc", "Oo", "Oc", 113 "Bk", "Ek", "Bt", "Hf", 114 "Fr", "Ud", "Lb", "Lp", 115 "Lk", "Mt", "Brq", "Bro", 116 /* LINTED */ 117 "Brc", "\%C", "Es", "En", 118 /* LINTED */ 119 "Dx", "\%Q", "br", "sp", 120 /* LINTED */ 121 "\%U" 122 }; 123 124const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 125 "split", "nosplit", "ragged", 126 "unfilled", "literal", "file", 127 "offset", "bullet", "dash", 128 "hyphen", "item", "enum", 129 "tag", "diag", "hang", 130 "ohang", "inset", "column", 131 "width", "compact", "std", 132 "filled", "words", "emphasis", 133 "symbolic", "nested", "centered" 134 }; 135 136const char * const *mdoc_macronames = __mdoc_macronames; 137const char * const *mdoc_argnames = __mdoc_argnames; 138 139static void mdoc_free1(struct mdoc *); 140static int mdoc_alloc1(struct mdoc *); 141static struct mdoc_node *node_alloc(struct mdoc *, int, int, 142 int, enum mdoc_type); 143static int node_append(struct mdoc *, 144 struct mdoc_node *); 145static int parsetext(struct mdoc *, int, char *); 146static int parsemacro(struct mdoc *, int, char *); 147static int macrowarn(struct mdoc *, int, const char *); 148static int pstring(struct mdoc *, int, int, 149 const char *, size_t); 150 151#ifdef __linux__ 152extern size_t strlcpy(char *, const char *, size_t); 153#endif 154 155 156const struct mdoc_node * 157mdoc_node(const struct mdoc *m) 158{ 159 160 return(MDOC_HALT & m->flags ? NULL : m->first); 161} 162 163 164const struct mdoc_meta * 165mdoc_meta(const struct mdoc *m) 166{ 167 168 return(MDOC_HALT & m->flags ? NULL : &m->meta); 169} 170 171 172/* 173 * Frees volatile resources (parse tree, meta-data, fields). 174 */ 175static void 176mdoc_free1(struct mdoc *mdoc) 177{ 178 179 if (mdoc->first) 180 mdoc_node_freelist(mdoc->first); 181 if (mdoc->meta.title) 182 free(mdoc->meta.title); 183 if (mdoc->meta.os) 184 free(mdoc->meta.os); 185 if (mdoc->meta.name) 186 free(mdoc->meta.name); 187 if (mdoc->meta.arch) 188 free(mdoc->meta.arch); 189 if (mdoc->meta.vol) 190 free(mdoc->meta.vol); 191} 192 193 194/* 195 * Allocate all volatile resources (parse tree, meta-data, fields). 196 */ 197static int 198mdoc_alloc1(struct mdoc *mdoc) 199{ 200 201 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 202 mdoc->flags = 0; 203 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 204 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 205 if (NULL == mdoc->last) 206 return(0); 207 208 mdoc->first = mdoc->last; 209 mdoc->last->type = MDOC_ROOT; 210 mdoc->next = MDOC_NEXT_CHILD; 211 return(1); 212} 213 214 215/* 216 * Free up volatile resources (see mdoc_free1()) then re-initialises the 217 * data with mdoc_alloc1(). After invocation, parse data has been reset 218 * and the parser is ready for re-invocation on a new tree; however, 219 * cross-parse non-volatile data is kept intact. 220 */ 221int 222mdoc_reset(struct mdoc *mdoc) 223{ 224 225 mdoc_free1(mdoc); 226 return(mdoc_alloc1(mdoc)); 227} 228 229 230/* 231 * Completely free up all volatile and non-volatile parse resources. 232 * After invocation, the pointer is no longer usable. 233 */ 234void 235mdoc_free(struct mdoc *mdoc) 236{ 237 238 mdoc_free1(mdoc); 239 free(mdoc); 240} 241 242 243/* 244 * Allocate volatile and non-volatile parse resources. 245 */ 246struct mdoc * 247mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 248{ 249 struct mdoc *p; 250 251 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 252 return(NULL); 253 if (cb) 254 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 255 256 mdoc_hash_init(); 257 258 p->data = data; 259 p->pflags = pflags; 260 261 if (mdoc_alloc1(p)) 262 return(p); 263 264 free(p); 265 return(NULL); 266} 267 268 269/* 270 * Climb back up the parse tree, validating open scopes. Mostly calls 271 * through to macro_end() in macro.c. 272 */ 273int 274mdoc_endparse(struct mdoc *m) 275{ 276 277 if (MDOC_HALT & m->flags) 278 return(0); 279 else if (mdoc_macroend(m)) 280 return(1); 281 m->flags |= MDOC_HALT; 282 return(0); 283} 284 285 286/* 287 * Main parse routine. Parses a single line -- really just hands off to 288 * the macro (parsemacro()) or text parser (parsetext()). 289 */ 290int 291mdoc_parseln(struct mdoc *m, int ln, char *buf) 292{ 293 294 if (MDOC_HALT & m->flags) 295 return(0); 296 297 return('.' == *buf ? parsemacro(m, ln, buf) : 298 parsetext(m, ln, buf)); 299} 300 301 302int 303mdoc_verr(struct mdoc *mdoc, int ln, int pos, 304 const char *fmt, ...) 305{ 306 char buf[256]; 307 va_list ap; 308 309 if (NULL == mdoc->cb.mdoc_err) 310 return(0); 311 312 va_start(ap, fmt); 313 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 314 va_end(ap); 315 316 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 317} 318 319 320int 321mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 322{ 323 char buf[256]; 324 va_list ap; 325 326 if (NULL == mdoc->cb.mdoc_warn) 327 return(0); 328 329 va_start(ap, fmt); 330 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 331 va_end(ap); 332 333 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf)); 334} 335 336 337int 338mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type) 339{ 340 const char *p; 341 342 p = __mdoc_merrnames[(int)type]; 343 assert(p); 344 345 if (iserr) 346 return(mdoc_verr(m, line, pos, p)); 347 348 return(mdoc_vwarn(m, line, pos, p)); 349} 350 351 352int 353mdoc_macro(struct mdoc *m, int tok, 354 int ln, int pp, int *pos, char *buf) 355{ 356 /* 357 * If we're in the prologue, deny "body" macros. Similarly, if 358 * we're in the body, deny prologue calls. 359 */ 360 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 361 MDOC_PBODY & m->flags) 362 return(mdoc_perr(m, ln, pp, EPROLBODY)); 363 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 364 ! (MDOC_PBODY & m->flags)) 365 return(mdoc_perr(m, ln, pp, EBODYPROL)); 366 367 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 368} 369 370 371static int 372node_append(struct mdoc *mdoc, struct mdoc_node *p) 373{ 374 375 assert(mdoc->last); 376 assert(mdoc->first); 377 assert(MDOC_ROOT != p->type); 378 379 switch (mdoc->next) { 380 case (MDOC_NEXT_SIBLING): 381 mdoc->last->next = p; 382 p->prev = mdoc->last; 383 p->parent = mdoc->last->parent; 384 break; 385 case (MDOC_NEXT_CHILD): 386 mdoc->last->child = p; 387 p->parent = mdoc->last; 388 break; 389 default: 390 abort(); 391 /* NOTREACHED */ 392 } 393 394 p->parent->nchild++; 395 396 if ( ! mdoc_valid_pre(mdoc, p)) 397 return(0); 398 if ( ! mdoc_action_pre(mdoc, p)) 399 return(0); 400 401 switch (p->type) { 402 case (MDOC_HEAD): 403 assert(MDOC_BLOCK == p->parent->type); 404 p->parent->head = p; 405 break; 406 case (MDOC_TAIL): 407 assert(MDOC_BLOCK == p->parent->type); 408 p->parent->tail = p; 409 break; 410 case (MDOC_BODY): 411 assert(MDOC_BLOCK == p->parent->type); 412 p->parent->body = p; 413 break; 414 default: 415 break; 416 } 417 418 mdoc->last = p; 419 420 switch (p->type) { 421 case (MDOC_TEXT): 422 if ( ! mdoc_valid_post(mdoc)) 423 return(0); 424 if ( ! mdoc_action_post(mdoc)) 425 return(0); 426 break; 427 default: 428 break; 429 } 430 431 return(1); 432} 433 434 435static struct mdoc_node * 436node_alloc(struct mdoc *m, int line, 437 int pos, int tok, enum mdoc_type type) 438{ 439 struct mdoc_node *p; 440 441 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 442 (void)mdoc_nerr(m, m->last, EMALLOC); 443 return(NULL); 444 } 445 446 p->sec = m->lastsec; 447 p->line = line; 448 p->pos = pos; 449 p->tok = tok; 450 if (MDOC_TEXT != (p->type = type)) 451 assert(p->tok >= 0); 452 453 return(p); 454} 455 456 457int 458mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok) 459{ 460 struct mdoc_node *p; 461 462 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 463 if (NULL == p) 464 return(0); 465 if ( ! node_append(m, p)) 466 return(0); 467 m->next = MDOC_NEXT_CHILD; 468 return(1); 469} 470 471 472int 473mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok) 474{ 475 struct mdoc_node *p; 476 477 assert(m->first); 478 assert(m->last); 479 480 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 481 if (NULL == p) 482 return(0); 483 if ( ! node_append(m, p)) 484 return(0); 485 m->next = MDOC_NEXT_CHILD; 486 return(1); 487} 488 489 490int 491mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok) 492{ 493 struct mdoc_node *p; 494 495 p = node_alloc(m, line, pos, tok, MDOC_BODY); 496 if (NULL == p) 497 return(0); 498 if ( ! node_append(m, p)) 499 return(0); 500 m->next = MDOC_NEXT_CHILD; 501 return(1); 502} 503 504 505int 506mdoc_block_alloc(struct mdoc *m, int line, int pos, 507 int tok, struct mdoc_arg *args) 508{ 509 struct mdoc_node *p; 510 511 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 512 if (NULL == p) 513 return(0); 514 p->args = args; 515 if (p->args) 516 (args->refcnt)++; 517 if ( ! node_append(m, p)) 518 return(0); 519 m->next = MDOC_NEXT_CHILD; 520 return(1); 521} 522 523 524int 525mdoc_elem_alloc(struct mdoc *m, int line, int pos, 526 int tok, struct mdoc_arg *args) 527{ 528 struct mdoc_node *p; 529 530 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 531 if (NULL == p) 532 return(0); 533 p->args = args; 534 if (p->args) 535 (args->refcnt)++; 536 if ( ! node_append(m, p)) 537 return(0); 538 m->next = MDOC_NEXT_CHILD; 539 return(1); 540} 541 542 543static int 544pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) 545{ 546 struct mdoc_node *n; 547 size_t sv; 548 549 n = node_alloc(m, line, pos, -1, MDOC_TEXT); 550 if (NULL == n) 551 return(mdoc_nerr(m, m->last, EMALLOC)); 552 553 n->string = malloc(len + 1); 554 if (NULL == n->string) { 555 free(n); 556 return(mdoc_nerr(m, m->last, EMALLOC)); 557 } 558 559 sv = strlcpy(n->string, p, len + 1); 560 561 /* Prohibit truncation. */ 562 assert(sv < len + 1); 563 564 if ( ! node_append(m, n)) 565 return(0); 566 m->next = MDOC_NEXT_SIBLING; 567 return(1); 568} 569 570 571int 572mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 573{ 574 575 return(pstring(m, line, pos, p, strlen(p))); 576} 577 578 579void 580mdoc_node_free(struct mdoc_node *p) 581{ 582 583 if (p->parent) 584 p->parent->nchild--; 585 if (p->string) 586 free(p->string); 587 if (p->args) 588 mdoc_argv_free(p->args); 589 free(p); 590} 591 592 593void 594mdoc_node_freelist(struct mdoc_node *p) 595{ 596 597 if (p->child) 598 mdoc_node_freelist(p->child); 599 if (p->next) 600 mdoc_node_freelist(p->next); 601 602 assert(0 == p->nchild); 603 mdoc_node_free(p); 604} 605 606 607/* 608 * Parse free-form text, that is, a line that does not begin with the 609 * control character. 610 */ 611static int 612parsetext(struct mdoc *m, int line, char *buf) 613{ 614 int i, j; 615 616 if (SEC_NONE == m->lastnamed) 617 return(mdoc_perr(m, line, 0, ETEXTPROL)); 618 619 /* 620 * If in literal mode, then pass the buffer directly to the 621 * back-end, as it should be preserved as a single term. 622 */ 623 624 if (MDOC_LITERAL & m->flags) 625 return(mdoc_word_alloc(m, line, 0, buf)); 626 627 /* Disallow blank/white-space lines in non-literal mode. */ 628 629 for (i = 0; ' ' == buf[i]; i++) 630 /* Skip leading whitespace. */ ; 631 if (0 == buf[i]) 632 return(mdoc_perr(m, line, 0, ENOBLANK)); 633 634 /* 635 * Break apart a free-form line into tokens. Spaces are 636 * stripped out of the input. 637 */ 638 639 for (j = i; buf[i]; i++) { 640 if (' ' != buf[i]) 641 continue; 642 643 /* Escaped whitespace. */ 644 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 645 continue; 646 647 buf[i++] = 0; 648 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 649 return(0); 650 651 for ( ; ' ' == buf[i]; i++) 652 /* Skip trailing whitespace. */ ; 653 654 j = i; 655 if (0 == buf[i]) 656 break; 657 } 658 659 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 660 return(0); 661 662 m->next = MDOC_NEXT_SIBLING; 663 return(1); 664} 665 666 667 668static int 669macrowarn(struct mdoc *m, int ln, const char *buf) 670{ 671 if ( ! (MDOC_IGN_MACRO & m->pflags)) 672 return(mdoc_verr(m, ln, 0, 673 "unknown macro: %s%s", 674 buf, strlen(buf) > 3 ? "..." : "")); 675 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", 676 buf, strlen(buf) > 3 ? "..." : "")); 677} 678 679 680/* 681 * Parse a macro line, that is, a line beginning with the control 682 * character. 683 */ 684int 685parsemacro(struct mdoc *m, int ln, char *buf) 686{ 687 int i, j, c; 688 char mac[5]; 689 690 /* Empty lines are ignored. */ 691 692 if (0 == buf[1]) 693 return(1); 694 695 i = 1; 696 697 /* Accept whitespace after the initial control char. */ 698 699 if (' ' == buf[i]) { 700 i++; 701 while (buf[i] && ' ' == buf[i]) 702 i++; 703 if (0 == buf[i]) 704 return(1); 705 } 706 707 /* Copy the first word into a nil-terminated buffer. */ 708 709 for (j = 0; j < 4; j++, i++) { 710 if (0 == (mac[j] = buf[i])) 711 break; 712 else if (' ' == buf[i]) 713 break; 714 715 /* Check for invalid characters. */ 716 717 if (isgraph((u_char)buf[i])) 718 continue; 719 return(mdoc_perr(m, ln, i, EPRINT)); 720 } 721 722 mac[j] = 0; 723 724 if (j == 4 || j < 2) { 725 if ( ! macrowarn(m, ln, mac)) 726 goto err; 727 return(1); 728 } 729 730 if (MDOC_MAX == (c = mdoc_hash_find(mac))) { 731 if ( ! macrowarn(m, ln, mac)) 732 goto err; 733 return(1); 734 } 735 736 /* The macro is sane. Jump to the next word. */ 737 738 while (buf[i] && ' ' == buf[i]) 739 i++; 740 741 /* 742 * Begin recursive parse sequence. Since we're at the start of 743 * the line, we don't need to do callable/parseable checks. 744 */ 745 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 746 goto err; 747 748 return(1); 749 750err: /* Error out. */ 751 752 m->flags |= MDOC_HALT; 753 return(0); 754} 755 756 757