mdoc.c revision 1.21
1/* $Id: mdoc.c,v 1.21 2009/07/26 02:17:29 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <assert.h> 18#include <ctype.h> 19#include <stdarg.h> 20#include <stdio.h> 21#include <stdlib.h> 22#include <string.h> 23 24#include "libmdoc.h" 25 26const char *const __mdoc_merrnames[MERRMAX] = { 27 "trailing whitespace", /* ETAILWS */ 28 "empty last list column", /* ECOLEMPTY */ 29 "unexpected quoted parameter", /* EQUOTPARM */ 30 "unterminated quoted parameter", /* EQUOTTERM */ 31 "system: malloc error", /* EMALLOC */ 32 "argument parameter suggested", /* EARGVAL */ 33 "macro not callable", /* ENOCALL */ 34 "macro disallowed in prologue", /* EBODYPROL */ 35 "macro disallowed in body", /* EPROLBODY */ 36 "text disallowed in prologue", /* ETEXTPROL */ 37 "blank line disallowed", /* ENOBLANK */ 38 "text parameter too long", /* ETOOLONG */ 39 "invalid escape sequence", /* EESCAPE */ 40 "invalid character", /* EPRINT */ 41 "document has no body", /* ENODAT */ 42 "document has no prologue", /* ENOPROLOGUE */ 43 "expected line arguments", /* ELINE */ 44 "invalid AT&T argument", /* EATT */ 45 "default name not yet set", /* ENAME */ 46 "missing list type", /* ELISTTYPE */ 47 "missing display type", /* EDISPTYPE */ 48 "too many display types", /* EMULTIDISP */ 49 "too many list types", /* EMULTILIST */ 50 "NAME section must be first", /* ESECNAME */ 51 "badly-formed NAME section", /* ENAMESECINC */ 52 "argument repeated", /* EARGREP */ 53 "expected boolean parameter", /* EBOOL */ 54 "inconsistent column syntax", /* ECOLMIS */ 55 "nested display invalid", /* ENESTDISP */ 56 "width argument missing", /* EMISSWIDTH */ 57 "invalid section for this manual section", /* EWRONGMSEC */ 58 "section out of conventional order", /* ESECOOO */ 59 "section repeated", /* ESECREP */ 60 "invalid standard argument", /* EBADSTAND */ 61 "multi-line arguments discouraged", /* ENOMULTILINE */ 62 "multi-line arguments suggested", /* EMULTILINE */ 63 "line arguments discouraged", /* ENOLINE */ 64 "prologue macro out of conventional order", /* EPROLOOO */ 65 "prologue macro repeated", /* EPROLREP */ 66 "invalid manual section", /* EBADMSEC */ 67 "invalid section", /* EBADSEC */ 68 "invalid font mode", /* EFONT */ 69 "invalid date syntax", /* EBADDATE */ 70 "invalid number format", /* ENUMFMT */ 71 "superfluous width argument", /* ENOWIDTH */ 72 "system: utsname error", /* EUTSNAME */ 73 "obsolete macro", /* EOBS */ 74 "macro-like parameter", /* EMACPARM */ 75 "end-of-line scope violation", /* EIMPBRK */ 76 "empty macro ignored", /* EIGNE */ 77 "unclosed explicit scope", /* EOPEN */ 78 "unterminated quoted phrase", /* EQUOTPHR */ 79 "closure macro without prior context", /* ENOCTX */ 80 "invalid whitespace after control character", /* ESPACE */ 81 "no description found for library" /* ELIB */ 82}; 83 84const char *const __mdoc_macronames[MDOC_MAX] = { 85 "Ap", "Dd", "Dt", "Os", 86 "Sh", "Ss", "Pp", "D1", 87 "Dl", "Bd", "Ed", "Bl", 88 "El", "It", "Ad", "An", 89 "Ar", "Cd", "Cm", "Dv", 90 "Er", "Ev", "Ex", "Fa", 91 "Fd", "Fl", "Fn", "Ft", 92 "Ic", "In", "Li", "Nd", 93 "Nm", "Op", "Ot", "Pa", 94 "Rv", "St", "Va", "Vt", 95 /* LINTED */ 96 "Xr", "\%A", "\%B", "\%D", 97 /* LINTED */ 98 "\%I", "\%J", "\%N", "\%O", 99 /* LINTED */ 100 "\%P", "\%R", "\%T", "\%V", 101 "Ac", "Ao", "Aq", "At", 102 "Bc", "Bf", "Bo", "Bq", 103 "Bsx", "Bx", "Db", "Dc", 104 "Do", "Dq", "Ec", "Ef", 105 "Em", "Eo", "Fx", "Ms", 106 "No", "Ns", "Nx", "Ox", 107 "Pc", "Pf", "Po", "Pq", 108 "Qc", "Ql", "Qo", "Qq", 109 "Re", "Rs", "Sc", "So", 110 "Sq", "Sm", "Sx", "Sy", 111 "Tn", "Ux", "Xc", "Xo", 112 "Fo", "Fc", "Oo", "Oc", 113 "Bk", "Ek", "Bt", "Hf", 114 "Fr", "Ud", "Lb", "Lp", 115 "Lk", "Mt", "Brq", "Bro", 116 /* LINTED */ 117 "Brc", "\%C", "Es", "En", 118 /* LINTED */ 119 "Dx", "\%Q", "br", "sp" 120 }; 121 122const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 123 "split", "nosplit", "ragged", 124 "unfilled", "literal", "file", 125 "offset", "bullet", "dash", 126 "hyphen", "item", "enum", 127 "tag", "diag", "hang", 128 "ohang", "inset", "column", 129 "width", "compact", "std", 130 "filled", "words", "emphasis", 131 "symbolic", "nested" 132 }; 133 134const char * const *mdoc_macronames = __mdoc_macronames; 135const char * const *mdoc_argnames = __mdoc_argnames; 136 137static void mdoc_free1(struct mdoc *); 138static int mdoc_alloc1(struct mdoc *); 139static struct mdoc_node *node_alloc(struct mdoc *, int, int, 140 int, enum mdoc_type); 141static int node_append(struct mdoc *, 142 struct mdoc_node *); 143static int parsetext(struct mdoc *, int, char *); 144static int parsemacro(struct mdoc *, int, char *); 145static int macrowarn(struct mdoc *, int, const char *); 146static int pstring(struct mdoc *, int, int, 147 const char *, size_t); 148 149 150const struct mdoc_node * 151mdoc_node(const struct mdoc *m) 152{ 153 154 return(MDOC_HALT & m->flags ? NULL : m->first); 155} 156 157 158const struct mdoc_meta * 159mdoc_meta(const struct mdoc *m) 160{ 161 162 return(MDOC_HALT & m->flags ? NULL : &m->meta); 163} 164 165 166/* 167 * Frees volatile resources (parse tree, meta-data, fields). 168 */ 169static void 170mdoc_free1(struct mdoc *mdoc) 171{ 172 173 if (mdoc->first) 174 mdoc_node_freelist(mdoc->first); 175 if (mdoc->meta.title) 176 free(mdoc->meta.title); 177 if (mdoc->meta.os) 178 free(mdoc->meta.os); 179 if (mdoc->meta.name) 180 free(mdoc->meta.name); 181 if (mdoc->meta.arch) 182 free(mdoc->meta.arch); 183 if (mdoc->meta.vol) 184 free(mdoc->meta.vol); 185} 186 187 188/* 189 * Allocate all volatile resources (parse tree, meta-data, fields). 190 */ 191static int 192mdoc_alloc1(struct mdoc *mdoc) 193{ 194 195 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 196 mdoc->flags = 0; 197 mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 198 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 199 if (NULL == mdoc->last) 200 return(0); 201 202 mdoc->first = mdoc->last; 203 mdoc->last->type = MDOC_ROOT; 204 mdoc->next = MDOC_NEXT_CHILD; 205 return(1); 206} 207 208 209/* 210 * Free up volatile resources (see mdoc_free1()) then re-initialises the 211 * data with mdoc_alloc1(). After invocation, parse data has been reset 212 * and the parser is ready for re-invocation on a new tree; however, 213 * cross-parse non-volatile data is kept intact. 214 */ 215int 216mdoc_reset(struct mdoc *mdoc) 217{ 218 219 mdoc_free1(mdoc); 220 return(mdoc_alloc1(mdoc)); 221} 222 223 224/* 225 * Completely free up all volatile and non-volatile parse resources. 226 * After invocation, the pointer is no longer usable. 227 */ 228void 229mdoc_free(struct mdoc *mdoc) 230{ 231 232 mdoc_free1(mdoc); 233 if (mdoc->htab) 234 mdoc_hash_free(mdoc->htab); 235 free(mdoc); 236} 237 238 239/* 240 * Allocate volatile and non-volatile parse resources. 241 */ 242struct mdoc * 243mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 244{ 245 struct mdoc *p; 246 247 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 248 return(NULL); 249 if (cb) 250 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 251 252 p->data = data; 253 p->pflags = pflags; 254 255 if (NULL == (p->htab = mdoc_hash_alloc())) { 256 free(p); 257 return(NULL); 258 } else if (mdoc_alloc1(p)) 259 return(p); 260 261 free(p); 262 return(NULL); 263} 264 265 266/* 267 * Climb back up the parse tree, validating open scopes. Mostly calls 268 * through to macro_end() in macro.c. 269 */ 270int 271mdoc_endparse(struct mdoc *m) 272{ 273 274 if (MDOC_HALT & m->flags) 275 return(0); 276 else if (mdoc_macroend(m)) 277 return(1); 278 m->flags |= MDOC_HALT; 279 return(0); 280} 281 282 283/* 284 * Main parse routine. Parses a single line -- really just hands off to 285 * the macro (parsemacro()) or text parser (parsetext()). 286 */ 287int 288mdoc_parseln(struct mdoc *m, int ln, char *buf) 289{ 290 291 if (MDOC_HALT & m->flags) 292 return(0); 293 294 return('.' == *buf ? parsemacro(m, ln, buf) : 295 parsetext(m, ln, buf)); 296} 297 298 299int 300mdoc_verr(struct mdoc *mdoc, int ln, int pos, 301 const char *fmt, ...) 302{ 303 char buf[256]; 304 va_list ap; 305 306 if (NULL == mdoc->cb.mdoc_err) 307 return(0); 308 309 va_start(ap, fmt); 310 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 311 va_end(ap); 312 313 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 314} 315 316 317int 318mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 319{ 320 char buf[256]; 321 va_list ap; 322 323 if (NULL == mdoc->cb.mdoc_warn) 324 return(0); 325 326 va_start(ap, fmt); 327 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 328 va_end(ap); 329 330 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf)); 331} 332 333 334int 335mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type) 336{ 337 const char *p; 338 339 p = __mdoc_merrnames[(int)type]; 340 assert(p); 341 342 if (iserr) 343 return(mdoc_verr(m, line, pos, p)); 344 345 return(mdoc_vwarn(m, line, pos, p)); 346} 347 348 349int 350mdoc_macro(struct mdoc *m, int tok, 351 int ln, int pp, int *pos, char *buf) 352{ 353 354 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 355 MDOC_PBODY & m->flags) 356 return(mdoc_perr(m, ln, pp, EPROLBODY)); 357 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 358 ! (MDOC_PBODY & m->flags)) 359 return(mdoc_perr(m, ln, pp, EBODYPROL)); 360 361 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) 362 return(mdoc_perr(m, ln, pp, ENOCALL)); 363 364 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 365} 366 367 368static int 369node_append(struct mdoc *mdoc, struct mdoc_node *p) 370{ 371 372 assert(mdoc->last); 373 assert(mdoc->first); 374 assert(MDOC_ROOT != p->type); 375 376 switch (mdoc->next) { 377 case (MDOC_NEXT_SIBLING): 378 mdoc->last->next = p; 379 p->prev = mdoc->last; 380 p->parent = mdoc->last->parent; 381 break; 382 case (MDOC_NEXT_CHILD): 383 mdoc->last->child = p; 384 p->parent = mdoc->last; 385 break; 386 default: 387 abort(); 388 /* NOTREACHED */ 389 } 390 391 p->parent->nchild++; 392 393 if ( ! mdoc_valid_pre(mdoc, p)) 394 return(0); 395 if ( ! mdoc_action_pre(mdoc, p)) 396 return(0); 397 398 switch (p->type) { 399 case (MDOC_HEAD): 400 assert(MDOC_BLOCK == p->parent->type); 401 p->parent->head = p; 402 break; 403 case (MDOC_TAIL): 404 assert(MDOC_BLOCK == p->parent->type); 405 p->parent->tail = p; 406 break; 407 case (MDOC_BODY): 408 assert(MDOC_BLOCK == p->parent->type); 409 p->parent->body = p; 410 break; 411 default: 412 break; 413 } 414 415 mdoc->last = p; 416 417 switch (p->type) { 418 case (MDOC_TEXT): 419 if ( ! mdoc_valid_post(mdoc)) 420 return(0); 421 if ( ! mdoc_action_post(mdoc)) 422 return(0); 423 break; 424 default: 425 break; 426 } 427 428 return(1); 429} 430 431 432static struct mdoc_node * 433node_alloc(struct mdoc *m, int line, 434 int pos, int tok, enum mdoc_type type) 435{ 436 struct mdoc_node *p; 437 438 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 439 (void)mdoc_nerr(m, m->last, EMALLOC); 440 return(NULL); 441 } 442 443 p->sec = m->lastsec; 444 p->line = line; 445 p->pos = pos; 446 p->tok = tok; 447 if (MDOC_TEXT != (p->type = type)) 448 assert(p->tok >= 0); 449 450 return(p); 451} 452 453 454int 455mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok) 456{ 457 struct mdoc_node *p; 458 459 p = node_alloc(m, line, pos, tok, MDOC_TAIL); 460 if (NULL == p) 461 return(0); 462 return(node_append(m, p)); 463} 464 465 466int 467mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok) 468{ 469 struct mdoc_node *p; 470 471 assert(m->first); 472 assert(m->last); 473 474 p = node_alloc(m, line, pos, tok, MDOC_HEAD); 475 if (NULL == p) 476 return(0); 477 return(node_append(m, p)); 478} 479 480 481int 482mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok) 483{ 484 struct mdoc_node *p; 485 486 p = node_alloc(m, line, pos, tok, MDOC_BODY); 487 if (NULL == p) 488 return(0); 489 return(node_append(m, p)); 490} 491 492 493int 494mdoc_block_alloc(struct mdoc *m, int line, int pos, 495 int tok, struct mdoc_arg *args) 496{ 497 struct mdoc_node *p; 498 499 p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 500 if (NULL == p) 501 return(0); 502 p->args = args; 503 if (p->args) 504 (args->refcnt)++; 505 return(node_append(m, p)); 506} 507 508 509int 510mdoc_elem_alloc(struct mdoc *m, int line, int pos, 511 int tok, struct mdoc_arg *args) 512{ 513 struct mdoc_node *p; 514 515 p = node_alloc(m, line, pos, tok, MDOC_ELEM); 516 if (NULL == p) 517 return(0); 518 p->args = args; 519 if (p->args) 520 (args->refcnt)++; 521 return(node_append(m, p)); 522} 523 524 525static int 526pstring(struct mdoc *m, int line, int pos, const char *p, size_t len) 527{ 528 struct mdoc_node *n; 529 size_t sv; 530 531 n = node_alloc(m, line, pos, -1, MDOC_TEXT); 532 if (NULL == n) 533 return(mdoc_nerr(m, m->last, EMALLOC)); 534 535 n->string = malloc(len + 1); 536 if (NULL == n->string) { 537 free(n); 538 return(mdoc_nerr(m, m->last, EMALLOC)); 539 } 540 541 sv = strlcpy(n->string, p, len + 1); 542 543 /* Prohibit truncation. */ 544 assert(sv < len + 1); 545 546 return(node_append(m, n)); 547} 548 549 550int 551mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 552{ 553 554 return(pstring(m, line, pos, p, strlen(p))); 555} 556 557 558void 559mdoc_node_free(struct mdoc_node *p) 560{ 561 562 if (p->parent) 563 p->parent->nchild--; 564 if (p->string) 565 free(p->string); 566 if (p->args) 567 mdoc_argv_free(p->args); 568 free(p); 569} 570 571 572void 573mdoc_node_freelist(struct mdoc_node *p) 574{ 575 576 if (p->child) 577 mdoc_node_freelist(p->child); 578 if (p->next) 579 mdoc_node_freelist(p->next); 580 581 assert(0 == p->nchild); 582 mdoc_node_free(p); 583} 584 585 586/* 587 * Parse free-form text, that is, a line that does not begin with the 588 * control character. 589 */ 590static int 591parsetext(struct mdoc *m, int line, char *buf) 592{ 593 int i, j; 594 595 if (SEC_NONE == m->lastnamed) 596 return(mdoc_perr(m, line, 0, ETEXTPROL)); 597 598 /* 599 * If in literal mode, then pass the buffer directly to the 600 * back-end, as it should be preserved as a single term. 601 */ 602 603 if (MDOC_LITERAL & m->flags) { 604 if ( ! mdoc_word_alloc(m, line, 0, buf)) 605 return(0); 606 m->next = MDOC_NEXT_SIBLING; 607 return(1); 608 } 609 610 /* Disallow blank/white-space lines in non-literal mode. */ 611 612 for (i = 0; ' ' == buf[i]; i++) 613 /* Skip leading whitespace. */ ; 614 if (0 == buf[i]) 615 return(mdoc_perr(m, line, 0, ENOBLANK)); 616 617 /* 618 * Break apart a free-form line into tokens. Spaces are 619 * stripped out of the input. 620 */ 621 622 for (j = i; buf[i]; i++) { 623 if (' ' != buf[i]) 624 continue; 625 626 /* Escaped whitespace. */ 627 if (i && ' ' == buf[i] && '\\' == buf[i - 1]) 628 continue; 629 630 buf[i++] = 0; 631 if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 632 return(0); 633 m->next = MDOC_NEXT_SIBLING; 634 635 for ( ; ' ' == buf[i]; i++) 636 /* Skip trailing whitespace. */ ; 637 638 j = i; 639 if (0 == buf[i]) 640 break; 641 } 642 643 if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j))) 644 return(0); 645 646 m->next = MDOC_NEXT_SIBLING; 647 return(1); 648} 649 650 651 652 653static int 654macrowarn(struct mdoc *m, int ln, const char *buf) 655{ 656 if ( ! (MDOC_IGN_MACRO & m->pflags)) 657 return(mdoc_verr(m, ln, 1, 658 "unknown macro: %s%s", 659 buf, strlen(buf) > 3 ? "..." : "")); 660 return(mdoc_vwarn(m, ln, 1, "unknown macro: %s%s", 661 buf, strlen(buf) > 3 ? "..." : "")); 662} 663 664 665/* 666 * Parse a macro line, that is, a line beginning with the control 667 * character. 668 */ 669int 670parsemacro(struct mdoc *m, int ln, char *buf) 671{ 672 int i, c; 673 char mac[5]; 674 675 /* Empty lines are ignored. */ 676 677 if (0 == buf[1]) 678 return(1); 679 680 if (' ' == buf[1]) { 681 i = 2; 682 while (buf[i] && ' ' == buf[i]) 683 i++; 684 if (0 == buf[i]) 685 return(1); 686 return(mdoc_perr(m, ln, 1, ESPACE)); 687 } 688 689 /* Copy the first word into a nil-terminated buffer. */ 690 691 for (i = 1; i < 5; i++) { 692 if (0 == (mac[i - 1] = buf[i])) 693 break; 694 else if (' ' == buf[i]) 695 break; 696 } 697 698 mac[i - 1] = 0; 699 700 if (i == 5 || i <= 2) { 701 if ( ! macrowarn(m, ln, mac)) 702 goto err; 703 return(1); 704 } 705 706 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) { 707 if ( ! macrowarn(m, ln, mac)) 708 goto err; 709 return(1); 710 } 711 712 /* The macro is sane. Jump to the next word. */ 713 714 while (buf[i] && ' ' == buf[i]) 715 i++; 716 717 /* Begin recursive parse sequence. */ 718 719 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 720 goto err; 721 722 return(1); 723 724err: /* Error out. */ 725 726 m->flags |= MDOC_HALT; 727 return(0); 728} 729