mdoc.c revision 1.23
1/* $Id: mdoc.c,v 1.23 2009/08/22 17:21:24 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <assert.h> 18#include <ctype.h> 19#include <stdarg.h> 20#include <stdio.h> 21#include <stdlib.h> 22#include <string.h> 23 24#include "libmdoc.h" 25 26const char *const __mdoc_merrnames[MERRMAX] = { 27 "trailing whitespace", /* ETAILWS */ 28 "unexpected quoted parameter", /* EQUOTPARM */ 29 "unterminated quoted parameter", /* EQUOTTERM */ 30 "system: malloc error", /* EMALLOC */ 31 "argument parameter suggested", /* EARGVAL */ 32 "macro not callable", /* ENOCALL */ 33 "macro disallowed in prologue", /* EBODYPROL */ 34 "macro disallowed in body", /* EPROLBODY */ 35 "text disallowed in prologue", /* ETEXTPROL */ 36 "blank line disallowed", /* ENOBLANK */ 37 "text parameter too long", /* ETOOLONG */ 38 "invalid escape sequence", /* EESCAPE */ 39 "invalid character", /* EPRINT */ 40 "document has no body", /* ENODAT */ 41 "document has no prologue", /* ENOPROLOGUE */ 42 "expected line arguments", /* ELINE */ 43 "invalid AT&T argument", /* EATT */ 44 "default name not yet set", /* ENAME */ 45 "missing list type", /* ELISTTYPE */ 46 "missing display type", /* EDISPTYPE */ 47 "too many display types", /* EMULTIDISP */ 48 "too many list types", /* EMULTILIST */ 49 "NAME section must be first", /* ESECNAME */ 50 "badly-formed NAME section", /* ENAMESECINC */ 51 "argument repeated", /* EARGREP */ 52 "expected boolean parameter", /* EBOOL */ 53 "inconsistent column syntax", /* ECOLMIS */ 54 "nested display invalid", /* ENESTDISP */ 55 "width argument missing", /* EMISSWIDTH */ 56 "invalid section for this manual section", /* EWRONGMSEC */ 57 "section out of conventional order", /* ESECOOO */ 58 "section repeated", /* ESECREP */ 59 "invalid standard argument", /* EBADSTAND */ 60 "multi-line arguments discouraged", /* ENOMULTILINE */ 61 "multi-line arguments suggested", /* EMULTILINE */ 62 "line arguments discouraged", /* ENOLINE */ 63 "prologue macro out of conventional order", /* EPROLOOO */ 64 "prologue macro repeated", /* EPROLREP */ 65 "invalid manual section", /* EBADMSEC */ 66 "invalid section", /* EBADSEC */ 67 "invalid font mode", /* EFONT */ 68 "invalid date syntax", /* EBADDATE */ 69 "invalid number format", /* ENUMFMT */ 70 "superfluous width argument", /* ENOWIDTH */ 71 "system: utsname error", /* EUTSNAME */ 72 "obsolete macro", /* EOBS */ 73 "macro-like parameter", /* EMACPARM */ 74 "end-of-line scope violation", /* EIMPBRK */ 75 "empty macro ignored", /* EIGNE */ 76 "unclosed explicit scope", /* EOPEN */ 77 "unterminated quoted phrase", /* EQUOTPHR */ 78 "closure macro without prior context", /* ENOCTX */ 79 "invalid whitespace after control character", /* ESPACE */ 80 "no description found for library" /* ELIB */ 81}; 82 83const char *const __mdoc_macronames[MDOC_MAX] = { 84 "Ap", "Dd", "Dt", "Os", 85 "Sh", "Ss", "Pp", "D1", 86 "Dl", "Bd", "Ed", "Bl", 87 "El", "It", "Ad", "An", 88 "Ar", "Cd", "Cm", "Dv", 89 "Er", "Ev", "Ex", "Fa", 90 "Fd", "Fl", "Fn", "Ft", 91 "Ic", "In", "Li", "Nd", 92 "Nm", "Op", "Ot", "Pa", 93 "Rv", "St", "Va", "Vt", 94 /* LINTED */ 95 "Xr", "\%A", "\%B", "\%D", 96 /* LINTED */ 97 "\%I", "\%J", "\%N", "\%O", 98 /* LINTED */ 99 "\%P", "\%R", "\%T", "\%V", 100 "Ac", "Ao", "Aq", "At", 101 "Bc", "Bf", "Bo", "Bq", 102 "Bsx", "Bx", "Db", "Dc", 103 "Do", "Dq", "Ec", "Ef", 104 "Em", "Eo", "Fx", "Ms", 105 "No", "Ns", "Nx", "Ox", 106 "Pc", "Pf", "Po", "Pq", 107 "Qc", "Ql", "Qo", "Qq", 108 "Re", "Rs", "Sc", "So", 109 "Sq", "Sm", "Sx", "Sy", 110 "Tn", "Ux", "Xc", "Xo", 111 "Fo", "Fc", "Oo", "Oc", 112 "Bk", "Ek", "Bt", "Hf", 113 "Fr", "Ud", "Lb", "Lp", 114 "Lk", "Mt", "Brq", "Bro", 115 /* LINTED */ 116 "Brc", "\%C", "Es", "En", 117 /* LINTED */ 118 "Dx", "\%Q", "br", "sp" 119 }; 120 121const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 122 "split", "nosplit", "ragged", 123 "unfilled", "literal", "file", 124 "offset", "bullet", "dash", 125 "hyphen", "item", "enum", 126 "tag", "diag", "hang", 127 "ohang", "inset", "column", 128 "width", "compact", "std", 129 "filled", "words", "emphasis", 130 "symbolic", "nested" 131 }; 132 133const char * const *mdoc_macronames = __mdoc_macronames; 134const char * const *mdoc_argnames = __mdoc_argnames; 135 136static void mdoc_free1(struct mdoc *); 137static int mdoc_alloc1(struct mdoc *); 138static struct mdoc_node *node_alloc(struct mdoc *, int, int, 139 int, enum mdoc_type); 140static int node_append(struct mdoc *, 141 struct mdoc_node *); 142static int parsetext(struct mdoc *, int, char *); 143static int parsemacro(struct mdoc *, int, char *); 144static int macrowarn(struct mdoc *, int, const char *); 145static int pstring(struct mdoc *, int, int, 146 const char *, size_t); 147 148 149const struct mdoc_node * 150mdoc_node(const struct mdoc *m) 151{ 152 153 return(MDOC_HALT & m->flags ? NULL : m->first); 154} 155 156 157const struct mdoc_meta * 158mdoc_meta(const struct mdoc *m) 159{ 160 161 return(MDOC_HALT & m->flags ? NULL : &m->meta); 162} 163 164 165/* 166 * Frees volatile resources (parse tree, meta-data, fields). 167 */ 168static void 169mdoc_free1(struct mdoc *mdoc) 170{ 171 172 if (mdoc->first) 173 mdoc_node_freelist(mdoc->first); 174 if (mdoc->meta.title) 175 free(mdoc->meta.title); 176 if (mdoc->meta.os) 177 free(mdoc->meta.os); 178 if (mdoc->meta.name) 179 free(mdoc->meta.name); 180 if (mdoc->meta.arch) 181 free(mdoc->meta.arch); 182 if (mdoc->meta.vol) 183 free(mdoc->meta.vol); 184} 185 186 187/* 188 * Allocate all volatile resources (parse tree, meta-data, fields). 189 */ 190static int 191mdoc_alloc1(struct mdoc *mdoc) 192{ 193 194 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 195 mdoc->flags = 0; 196 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 197 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 198 if (NULL == mdoc->last) 199 return(0); 200 201 mdoc->first = mdoc->last; 202 mdoc->last->type = MDOC_ROOT; 203 mdoc->next = MDOC_NEXT_CHILD; 204 return(1); 205} 206 207 208/* 209 * Free up volatile resources (see mdoc_free1()) then re-initialises the 210 * data with mdoc_alloc1(). After invocation, parse data has been reset 211 * and the parser is ready for re-invocation on a new tree; however, 212 * cross-parse non-volatile data is kept intact. 213 */ 214int 215mdoc_reset(struct mdoc *mdoc) 216{ 217 218 mdoc_free1(mdoc); 219 return(mdoc_alloc1(mdoc)); 220} 221 222 223/* 224 * Completely free up all volatile and non-volatile parse resources. 225 * After invocation, the pointer is no longer usable. 226 */ 227void 228mdoc_free(struct mdoc *mdoc) 229{ 230 231 mdoc_free1(mdoc); 232 if (mdoc->htab) 233 mdoc_hash_free(mdoc->htab); 234 free(mdoc); 235} 236 237 238/* 239 * Allocate volatile and non-volatile parse resources. 240 */ 241struct mdoc * 242mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 243{ 244 struct mdoc *p; 245 246 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 247 return(NULL); 248 if (cb) 249 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 250 251 p->data = data; 252 p->pflags = pflags; 253 254 if (NULL == (p->htab = mdoc_hash_alloc())) { 255 free(p); 256 return(NULL); 257 } else if (mdoc_alloc1(p)) 258 return(p); 259 260 free(p); 261 return(NULL); 262} 263 264 265/* 266 * Climb back up the parse tree, validating open scopes. Mostly calls 267 * through to macro_end() in macro.c. 268 */ 269int 270mdoc_endparse(struct mdoc *m) 271{ 272 273 if (MDOC_HALT & m->flags) 274 return(0); 275 else if (mdoc_macroend(m)) 276 return(1); 277 m->flags |= MDOC_HALT; 278 return(0); 279} 280 281 282/* 283 * Main parse routine. Parses a single line -- really just hands off to 284 * the macro (parsemacro()) or text parser (parsetext()). 285 */ 286int 287mdoc_parseln(struct mdoc *m, int ln, char *buf) 288{ 289 290 if (MDOC_HALT & m->flags) 291 return(0); 292 293 return('.' == *buf ? parsemacro(m, ln, buf) : 294 parsetext(m, ln, buf)); 295} 296 297 298int 299mdoc_verr(struct mdoc *mdoc, int ln, int pos, 300 const char *fmt, ...) 301{ 302 char buf[256]; 303 va_list ap; 304 305 if (NULL == mdoc->cb.mdoc_err) 306 return(0); 307 308 va_start(ap, fmt); 309 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 310 va_end(ap); 311 312 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 313} 314 315 316int 317mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 318{ 319 char buf[256]; 320 va_list ap; 321 322 if (NULL == mdoc->cb.mdoc_warn) 323 return(0); 324 325 va_start(ap, fmt); 326 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 327 va_end(ap); 328 329 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf)); 330} 331 332 333int 334mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type) 335{ 336 const char *p; 337 338 p = __mdoc_merrnames[(int)type]; 339 assert(p); 340 341 if (iserr) 342 return(mdoc_verr(m, line, pos, p)); 343 344 return(mdoc_vwarn(m, line, pos, p)); 345} 346 347 348int 349mdoc_macro(struct mdoc *m, int tok, 350 int ln, int pp, int *pos, char *buf) 351{ 352 353 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 354 MDOC_PBODY & m->flags) 355 return(mdoc_perr(m, ln, pp, EPROLBODY)); 356 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 357 ! (MDOC_PBODY & m->flags)) 358 return(mdoc_perr(m, ln, pp, EBODYPROL)); 359 360 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) 361 return(mdoc_perr(m, ln, pp, ENOCALL)); 362 363 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 364} 365 366 367static int 368node_append(struct mdoc *mdoc, struct mdoc_node *p) 369{ 370 371 assert(mdoc->last); 372 assert(mdoc->first); 373 assert(MDOC_ROOT != p->type); 374 375 switch (mdoc->next) { 376 case (MDOC_NEXT_SIBLING): 377 mdoc->last->next = p; 378 p->prev = mdoc->last; 379 p->parent = mdoc->last->parent; 380 break; 381 case (MDOC_NEXT_CHILD): 382 mdoc->last->child = p; 383 p->parent = mdoc->last; 384 break; 385 default: 386 abort(); 387 /* NOTREACHED */ 388 } 389 390 p->parent->nchild++; 391 392 if ( ! mdoc_valid_pre(mdoc, p)) 393 return(0); 394 if ( ! mdoc_action_pre(mdoc, p)) 395 return(0); 396 397 switch (p->type) { 398 case (MDOC_HEAD): 399 assert(MDOC_BLOCK == p->parent->type); 400 p->parent->head = p; 401 break; 402 case (MDOC_TAIL): 403 assert(MDOC_BLOCK == p->parent->type); 404 p->parent->tail = p; 405 break; 406 case (MDOC_BODY): 407 assert(MDOC_BLOCK == p->parent->type); 408 p->parent->body = p; 409 break; 410 default: 411 break; 412 } 413 414 mdoc->last = p; 415 416 switch (p->type) { 417 case (MDOC_TEXT): 418 if ( ! mdoc_valid_post(mdoc)) 419 return(0); 420 if ( ! mdoc_action_post(mdoc)) 421 return(0); 422 break; 423 default: 424 break; 425 } 426 427 return(1); 428} 429 430 431static struct mdoc_node * 432node_alloc(struct mdoc *m, int line, 433 int pos, int tok, enum mdoc_type type) 434{ 435 struct mdoc_node *p; 436 437 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 438 (void)mdoc_nerr(m, m->last, EMALLOC); 439 return(NULL); 440 } 441 442 p->sec = m->lastsec; 443 p->line = line; 444 p->pos = pos; 445 p->tok = tok; 446 if (MDOC_TEXT != (p->type = type)) 447 assert(p->tok >= 0); 448 449 return(p); 450} 451 452 453int 454mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok) 455{ 456 struct mdoc_node *p; 457 458 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 459 if (NULL == p) 460 return(0); 461 return(node_append(m, p)); 462} 463 464 465int 466mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok) 467{ 468 struct mdoc_node *p; 469 470 assert(m->first); 471 assert(m->last); 472 473 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 474 if (NULL == p) 475 return(0); 476 return(node_append(m, p)); 477} 478 479 480int 481mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok) 482{ 483 struct mdoc_node *p; 484 485 p = node_alloc(m, line, pos, tok, MDOC_BODY); 486 if (NULL == p) 487 return(0); 488 return(node_append(m, p)); 489} 490 491 492int 493mdoc_block_alloc(struct mdoc *m, int line, int pos, 494 int tok, struct mdoc_arg *args) 495{ 496 struct mdoc_node *p; 497 498 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 499 if (NULL == p) 500 return(0); 501 p->args = args; 502 if (p->args) 503 (args->refcnt)++; 504 return(node_append(m, p)); 505} 506 507 508int 509mdoc_elem_alloc(struct mdoc *m, int line, int pos, 510 int tok, struct mdoc_arg *args) 511{ 512 struct mdoc_node *p; 513 514 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 515 if (NULL == p) 516 return(0); 517 p->args = args; 518 if (p->args) 519 (args->refcnt)++; 520 return(node_append(m, p)); 521} 522 523 524static int 525pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) 526{ 527 struct mdoc_node *n; 528 size_t sv; 529 530 n = node_alloc(m, line, pos, -1, MDOC_TEXT); 531 if (NULL == n) 532 return(mdoc_nerr(m, m->last, EMALLOC)); 533 534 n->string = malloc(len + 1); 535 if (NULL == n->string) { 536 free(n); 537 return(mdoc_nerr(m, m->last, EMALLOC)); 538 } 539 540 sv = strlcpy(n->string, p, len + 1); 541 542 /* Prohibit truncation. */ 543 assert(sv < len + 1); 544 545 return(node_append(m, n)); 546} 547 548 549int 550mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 551{ 552 553 return(pstring(m, line, pos, p, strlen(p))); 554} 555 556 557void 558mdoc_node_free(struct mdoc_node *p) 559{ 560 561 if (p->parent) 562 p->parent->nchild--; 563 if (p->string) 564 free(p->string); 565 if (p->args) 566 mdoc_argv_free(p->args); 567 free(p); 568} 569 570 571void 572mdoc_node_freelist(struct mdoc_node *p) 573{ 574 575 if (p->child) 576 mdoc_node_freelist(p->child); 577 if (p->next) 578 mdoc_node_freelist(p->next); 579 580 assert(0 == p->nchild); 581 mdoc_node_free(p); 582} 583 584 585/* 586 * Parse free-form text, that is, a line that does not begin with the 587 * control character. 588 */ 589static int 590parsetext(struct mdoc *m, int line, char *buf) 591{ 592 int i, j; 593 594 if (SEC_NONE == m->lastnamed) 595 return(mdoc_perr(m, line, 0, ETEXTPROL)); 596 597 /* 598 * If in literal mode, then pass the buffer directly to the 599 * back-end, as it should be preserved as a single term. 600 */ 601 602 if (MDOC_LITERAL & m->flags) { 603 if ( ! mdoc_word_alloc(m, line, 0, buf)) 604 return(0); 605 m->next = MDOC_NEXT_SIBLING; 606 return(1); 607 } 608 609 /* Disallow blank/white-space lines in non-literal mode. */ 610 611 for (i = 0; ' ' == buf[i]; i++) 612 /* Skip leading whitespace. */ ; 613 if (0 == buf[i]) 614 return(mdoc_perr(m, line, 0, ENOBLANK)); 615 616 /* 617 * Break apart a free-form line into tokens. Spaces are 618 * stripped out of the input. 619 */ 620 621 for (j = i; buf[i]; i++) { 622 if (' ' != buf[i]) 623 continue; 624 625 /* Escaped whitespace. */ 626 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 627 continue; 628 629 buf[i++] = 0; 630 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 631 return(0); 632 m->next = MDOC_NEXT_SIBLING; 633 634 for ( ; ' ' == buf[i]; i++) 635 /* Skip trailing whitespace. */ ; 636 637 j = i; 638 if (0 == buf[i]) 639 break; 640 } 641 642 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 643 return(0); 644 645 m->next = MDOC_NEXT_SIBLING; 646 return(1); 647} 648 649 650 651 652static int 653macrowarn(struct mdoc *m, int ln, const char *buf) 654{ 655 if ( ! (MDOC_IGN_MACRO & m->pflags)) 656 return(mdoc_verr(m, ln, 0, 657 "unknown macro: %s%s", 658 buf, strlen(buf) > 3 ? "..." : "")); 659 return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s", 660 buf, strlen(buf) > 3 ? "..." : "")); 661} 662 663 664/* 665 * Parse a macro line, that is, a line beginning with the control 666 * character. 667 */ 668int 669parsemacro(struct mdoc *m, int ln, char *buf) 670{ 671 int i, c; 672 char mac[5]; 673 674 /* Empty lines are ignored. */ 675 676 if (0 == buf[1]) 677 return(1); 678 679 if (' ' == buf[1]) { 680 i = 2; 681 while (buf[i] && ' ' == buf[i]) 682 i++; 683 if (0 == buf[i]) 684 return(1); 685 return(mdoc_perr(m, ln, 1, ESPACE)); 686 } 687 688 /* Copy the first word into a nil-terminated buffer. */ 689 690 for (i = 1; i < 5; i++) { 691 if (0 == (mac[i - 1] = buf[i])) 692 break; 693 else if (' ' == buf[i]) 694 break; 695 } 696 697 mac[i - 1] = 0; 698 699 if (i == 5 || i <= 2) { 700 if ( ! macrowarn(m, ln, mac)) 701 goto err; 702 return(1); 703 } 704 705 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) { 706 if ( ! macrowarn(m, ln, mac)) 707 goto err; 708 return(1); 709 } 710 711 /* The macro is sane. Jump to the next word. */ 712 713 while (buf[i] && ' ' == buf[i]) 714 i++; 715 716 /* Begin recursive parse sequence. */ 717 718 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 719 goto err; 720 721 return(1); 722 723err: /* Error out. */ 724 725 m->flags |= MDOC_HALT; 726 return(0); 727} 728