mdoc.c revision 1.2
1/* $Id: mdoc.c,v 1.2 2009/04/15 20:10:20 miod Exp $ */ 2/* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the 7 * above copyright notice and this permission notice appear in all 8 * copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 17 * PERFORMANCE OF THIS SOFTWARE. 18 */ 19#include <assert.h> 20#include <ctype.h> 21#include <stdarg.h> 22#include <stdio.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include "libmdoc.h" 27 28enum merr { 29 ENOCALL, 30 EBODYPROL, 31 EPROLBODY, 32 ESPACE, 33 ETEXTPROL, 34 ENOBLANK, 35 EMALLOC 36}; 37 38const char *const __mdoc_macronames[MDOC_MAX] = { 39 "\\\"", "Dd", "Dt", "Os", 40 "Sh", "Ss", "Pp", "D1", 41 "Dl", "Bd", "Ed", "Bl", 42 "El", "It", "Ad", "An", 43 "Ar", "Cd", "Cm", "Dv", 44 "Er", "Ev", "Ex", "Fa", 45 "Fd", "Fl", "Fn", "Ft", 46 "Ic", "In", "Li", "Nd", 47 "Nm", "Op", "Ot", "Pa", 48 "Rv", "St", "Va", "Vt", 49 /* LINTED */ 50 "Xr", "\%A", "\%B", "\%D", 51 /* LINTED */ 52 "\%I", "\%J", "\%N", "\%O", 53 /* LINTED */ 54 "\%P", "\%R", "\%T", "\%V", 55 "Ac", "Ao", "Aq", "At", 56 "Bc", "Bf", "Bo", "Bq", 57 "Bsx", "Bx", "Db", "Dc", 58 "Do", "Dq", "Ec", "Ef", 59 "Em", "Eo", "Fx", "Ms", 60 "No", "Ns", "Nx", "Ox", 61 "Pc", "Pf", "Po", "Pq", 62 "Qc", "Ql", "Qo", "Qq", 63 "Re", "Rs", "Sc", "So", 64 "Sq", "Sm", "Sx", "Sy", 65 "Tn", "Ux", "Xc", "Xo", 66 "Fo", "Fc", "Oo", "Oc", 67 "Bk", "Ek", "Bt", "Hf", 68 "Fr", "Ud", "Lb", "Ap", 69 "Lp", "Lk", "Mt", "Brq", 70 /* LINTED */ 71 "Bro", "Brc", "\%C", "Es", 72 /* LINTED */ 73 "En", "Dx", "\%Q" 74 }; 75 76const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 77 "split", "nosplit", "ragged", 78 "unfilled", "literal", "file", 79 "offset", "bullet", "dash", 80 "hyphen", "item", "enum", 81 "tag", "diag", "hang", 82 "ohang", "inset", "column", 83 "width", "compact", "std", 84 "filled", "words", "emphasis", 85 "symbolic", "nested" 86 }; 87 88const char * const *mdoc_macronames = __mdoc_macronames; 89const char * const *mdoc_argnames = __mdoc_argnames; 90 91static void mdoc_free1(struct mdoc *); 92static int mdoc_alloc1(struct mdoc *); 93static struct mdoc_node *node_alloc(struct mdoc *, int, int, 94 int, enum mdoc_type); 95static int node_append(struct mdoc *, 96 struct mdoc_node *); 97static int parsetext(struct mdoc *, int, char *); 98static int parsemacro(struct mdoc *, int, char *); 99static int macrowarn(struct mdoc *, int, const char *); 100static int perr(struct mdoc *, int, int, enum merr); 101 102#define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t)) 103 104/* 105 * Get the first (root) node of the parse tree. 106 */ 107const struct mdoc_node * 108mdoc_node(const struct mdoc *m) 109{ 110 111 return(MDOC_HALT & m->flags ? NULL : m->first); 112} 113 114 115const struct mdoc_meta * 116mdoc_meta(const struct mdoc *m) 117{ 118 119 return(MDOC_HALT & m->flags ? NULL : &m->meta); 120} 121 122 123static void 124mdoc_free1(struct mdoc *mdoc) 125{ 126 127 if (mdoc->first) 128 mdoc_node_freelist(mdoc->first); 129 if (mdoc->meta.title) 130 free(mdoc->meta.title); 131 if (mdoc->meta.os) 132 free(mdoc->meta.os); 133 if (mdoc->meta.name) 134 free(mdoc->meta.name); 135 if (mdoc->meta.arch) 136 free(mdoc->meta.arch); 137 if (mdoc->meta.vol) 138 free(mdoc->meta.vol); 139} 140 141 142static int 143mdoc_alloc1(struct mdoc *mdoc) 144{ 145 146 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 147 mdoc->flags = 0; 148 mdoc->lastnamed = mdoc->lastsec = 0; 149 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 150 if (NULL == mdoc->last) 151 return(0); 152 153 mdoc->first = mdoc->last; 154 mdoc->last->type = MDOC_ROOT; 155 mdoc->next = MDOC_NEXT_CHILD; 156 return(1); 157} 158 159 160/* 161 * Free up all resources contributed by a parse: the node tree, 162 * meta-data and so on. Then reallocate the root node for another 163 * parse. 164 */ 165int 166mdoc_reset(struct mdoc *mdoc) 167{ 168 169 mdoc_free1(mdoc); 170 return(mdoc_alloc1(mdoc)); 171} 172 173 174/* 175 * Completely free up all resources. 176 */ 177void 178mdoc_free(struct mdoc *mdoc) 179{ 180 181 mdoc_free1(mdoc); 182 if (mdoc->htab) 183 mdoc_hash_free(mdoc->htab); 184 free(mdoc); 185} 186 187 188struct mdoc * 189mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 190{ 191 struct mdoc *p; 192 193 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 194 return(NULL); 195 if (cb) 196 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 197 198 p->data = data; 199 p->pflags = pflags; 200 201 if (NULL == (p->htab = mdoc_hash_alloc())) { 202 free(p); 203 return(NULL); 204 } else if (mdoc_alloc1(p)) 205 return(p); 206 207 free(p); 208 return(NULL); 209} 210 211 212/* 213 * Climb back up the parse tree, validating open scopes. Mostly calls 214 * through to macro_end in macro.c. 215 */ 216int 217mdoc_endparse(struct mdoc *m) 218{ 219 220 if (MDOC_HALT & m->flags) 221 return(0); 222 else if (mdoc_macroend(m)) 223 return(1); 224 m->flags |= MDOC_HALT; 225 return(0); 226} 227 228 229/* 230 * Main parse routine. Parses a single line -- really just hands off to 231 * the macro or text parser. 232 */ 233int 234mdoc_parseln(struct mdoc *m, int ln, char *buf) 235{ 236 237 /* If in error-mode, then we parse no more. */ 238 239 if (MDOC_HALT & m->flags) 240 return(0); 241 242 return('.' == *buf ? parsemacro(m, ln, buf) : 243 parsetext(m, ln, buf)); 244} 245 246 247void 248mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 249{ 250 char buf[256]; 251 va_list ap; 252 253 if (NULL == mdoc->cb.mdoc_msg) 254 return; 255 256 va_start(ap, fmt); 257 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 258 va_end(ap); 259 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf); 260} 261 262 263int 264mdoc_verr(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 265{ 266 char buf[256]; 267 va_list ap; 268 269 if (NULL == mdoc->cb.mdoc_err) 270 return(0); 271 272 va_start(ap, fmt); 273 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 274 va_end(ap); 275 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 276} 277 278 279int 280mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, 281 enum mdoc_warn type, const char *fmt, ...) 282{ 283 char buf[256]; 284 va_list ap; 285 286 if (NULL == mdoc->cb.mdoc_warn) 287 return(0); 288 289 va_start(ap, fmt); 290 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 291 va_end(ap); 292 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf)); 293} 294 295 296int 297mdoc_nwarn(struct mdoc *mdoc, const struct mdoc_node *node, enum mdoc_warn type, 298 const char *fmt, ...) 299{ 300 char buf[256]; 301 va_list ap; 302 303 if (NULL == mdoc->cb.mdoc_warn) 304 return(0); 305 306 va_start(ap, fmt); 307 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 308 va_end(ap); 309 return((*mdoc->cb.mdoc_warn)(mdoc->data, node->line, node->pos, type, 310 buf)); 311} 312 313int 314mdoc_nerr(struct mdoc *mdoc, const struct mdoc_node *node, const char *fmt, ...) 315{ 316 char buf[256]; 317 va_list ap; 318 319 if (NULL == mdoc->cb.mdoc_err) 320 return(0); 321 322 va_start(ap, fmt); 323 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 324 va_end(ap); 325 return((*mdoc->cb.mdoc_err)(mdoc->data, node->line, node->pos, buf)); 326} 327 328 329int 330mdoc_warn(struct mdoc *mdoc, enum mdoc_warn type, const char *fmt, ...) 331{ 332 char buf[256]; 333 va_list ap; 334 335 if (NULL == mdoc->cb.mdoc_warn) 336 return(0); 337 338 va_start(ap, fmt); 339 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 340 va_end(ap); 341 return((*mdoc->cb.mdoc_warn)(mdoc->data, mdoc->last->line, 342 mdoc->last->pos, type, buf)); 343} 344 345 346int 347mdoc_err(struct mdoc *mdoc, const char *fmt, ...) 348{ 349 char buf[256]; 350 va_list ap; 351 352 if (NULL == mdoc->cb.mdoc_err) 353 return(0); 354 355 va_start(ap, fmt); 356 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 357 va_end(ap); 358 return((*mdoc->cb.mdoc_err)(mdoc->data, mdoc->last->line, 359 mdoc->last->pos, buf)); 360} 361 362 363void 364mdoc_msg(struct mdoc *mdoc, const char *fmt, ...) 365{ 366 char buf[256]; 367 va_list ap; 368 369 if (NULL == mdoc->cb.mdoc_msg) 370 return; 371 372 va_start(ap, fmt); 373 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 374 va_end(ap); 375 (*mdoc->cb.mdoc_msg)(mdoc->data, mdoc->last->line, mdoc->last->pos, 376 buf); 377} 378 379 380void 381mdoc_pmsg(struct mdoc *mdoc, int line, int pos, const char *fmt, ...) 382{ 383 char buf[256]; 384 va_list ap; 385 386 if (NULL == mdoc->cb.mdoc_msg) 387 return; 388 389 va_start(ap, fmt); 390 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 391 va_end(ap); 392 (*mdoc->cb.mdoc_msg)(mdoc->data, line, pos, buf); 393} 394 395 396int 397mdoc_pwarn(struct mdoc *mdoc, int line, int pos, enum mdoc_warn type, 398 const char *fmt, ...) 399{ 400 char buf[256]; 401 va_list ap; 402 403 if (NULL == mdoc->cb.mdoc_warn) 404 return(0); 405 406 va_start(ap, fmt); 407 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 408 va_end(ap); 409 return((*mdoc->cb.mdoc_warn)(mdoc->data, line, pos, type, buf)); 410} 411 412int 413mdoc_perr(struct mdoc *mdoc, int line, int pos, const char *fmt, ...) 414{ 415 char buf[256]; 416 va_list ap; 417 418 if (NULL == mdoc->cb.mdoc_err) 419 return(0); 420 421 va_start(ap, fmt); 422 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 423 va_end(ap); 424 return((*mdoc->cb.mdoc_err)(mdoc->data, line, pos, buf)); 425} 426 427 428int 429mdoc_macro(struct mdoc *m, int tok, 430 int ln, int pp, int *pos, char *buf) 431{ 432 433 /* FIXME - these should happen during validation. */ 434 435 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 436 SEC_PROLOGUE != m->lastnamed) 437 return(perr(m, ln, pp, EPROLBODY)); 438 439 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 440 SEC_PROLOGUE == m->lastnamed) 441 return(perr(m, ln, pp, EBODYPROL)); 442 443 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) 444 return(perr(m, ln, pp, ENOCALL)); 445 446 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 447} 448 449 450static int 451perr(struct mdoc *m, int line, int pos, enum merr type) 452{ 453 char *p; 454 455 p = NULL; 456 switch (type) { 457 case (ENOCALL): 458 p = "not callable"; 459 break; 460 case (EPROLBODY): 461 p = "macro disallowed in document body"; 462 break; 463 case (EBODYPROL): 464 p = "macro disallowed in document prologue"; 465 break; 466 case (EMALLOC): 467 p = "memory exhausted"; 468 break; 469 case (ETEXTPROL): 470 p = "text disallowed in document prologue"; 471 break; 472 case (ENOBLANK): 473 p = "blank lines disallowed in non-literal contexts"; 474 break; 475 case (ESPACE): 476 p = "whitespace disallowed after delimiter"; 477 break; 478 } 479 assert(p); 480 return(mdoc_perr(m, line, pos, p)); 481} 482 483 484static int 485node_append(struct mdoc *mdoc, struct mdoc_node *p) 486{ 487 488 assert(mdoc->last); 489 assert(mdoc->first); 490 assert(MDOC_ROOT != p->type); 491 492 switch (mdoc->next) { 493 case (MDOC_NEXT_SIBLING): 494 mdoc->last->next = p; 495 p->prev = mdoc->last; 496 p->parent = mdoc->last->parent; 497 break; 498 case (MDOC_NEXT_CHILD): 499 mdoc->last->child = p; 500 p->parent = mdoc->last; 501 break; 502 default: 503 abort(); 504 /* NOTREACHED */ 505 } 506 507 if ( ! mdoc_valid_pre(mdoc, p)) 508 return(0); 509 if ( ! mdoc_action_pre(mdoc, p)) 510 return(0); 511 512 switch (p->type) { 513 case (MDOC_HEAD): 514 assert(MDOC_BLOCK == p->parent->type); 515 p->parent->head = p; 516 break; 517 case (MDOC_TAIL): 518 assert(MDOC_BLOCK == p->parent->type); 519 p->parent->tail = p; 520 break; 521 case (MDOC_BODY): 522 assert(MDOC_BLOCK == p->parent->type); 523 p->parent->body = p; 524 break; 525 default: 526 break; 527 } 528 529 mdoc->last = p; 530 531 switch (p->type) { 532 case (MDOC_TEXT): 533 if ( ! mdoc_valid_post(mdoc)) 534 return(0); 535 if ( ! mdoc_action_post(mdoc)) 536 return(0); 537 break; 538 default: 539 break; 540 } 541 542 return(1); 543} 544 545 546static struct mdoc_node * 547node_alloc(struct mdoc *mdoc, int line, 548 int pos, int tok, enum mdoc_type type) 549{ 550 struct mdoc_node *p; 551 552 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 553 (void)verr(mdoc, EMALLOC); 554 return(NULL); 555 } 556 557 p->sec = mdoc->lastsec; 558 p->line = line; 559 p->pos = pos; 560 p->tok = tok; 561 if (MDOC_TEXT != (p->type = type)) 562 assert(p->tok >= 0); 563 564 return(p); 565} 566 567 568int 569mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok) 570{ 571 struct mdoc_node *p; 572 573 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 574 if (NULL == p) 575 return(0); 576 return(node_append(mdoc, p)); 577} 578 579 580int 581mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok) 582{ 583 struct mdoc_node *p; 584 585 assert(mdoc->first); 586 assert(mdoc->last); 587 588 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 589 if (NULL == p) 590 return(0); 591 return(node_append(mdoc, p)); 592} 593 594 595int 596mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok) 597{ 598 struct mdoc_node *p; 599 600 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 601 if (NULL == p) 602 return(0); 603 return(node_append(mdoc, p)); 604} 605 606 607int 608mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 609 int tok, struct mdoc_arg *args) 610{ 611 struct mdoc_node *p; 612 613 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 614 if (NULL == p) 615 return(0); 616 if ((p->args = args)) 617 (args->refcnt)++; 618 return(node_append(mdoc, p)); 619} 620 621 622int 623mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 624 int tok, struct mdoc_arg *args) 625{ 626 struct mdoc_node *p; 627 628 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 629 if (NULL == p) 630 return(0); 631 if ((p->args = args)) 632 (args->refcnt)++; 633 return(node_append(mdoc, p)); 634} 635 636 637int 638mdoc_word_alloc(struct mdoc *mdoc, 639 int line, int pos, const char *word) 640{ 641 struct mdoc_node *p; 642 643 p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT); 644 if (NULL == p) 645 return(0); 646 if (NULL == (p->string = strdup(word))) { 647 (void)verr(mdoc, EMALLOC); 648 return(0); 649 } 650 return(node_append(mdoc, p)); 651} 652 653 654void 655mdoc_node_free(struct mdoc_node *p) 656{ 657 658 if (p->string) 659 free(p->string); 660 if (p->args) 661 mdoc_argv_free(p->args); 662 free(p); 663} 664 665 666void 667mdoc_node_freelist(struct mdoc_node *p) 668{ 669 670 if (p->child) 671 mdoc_node_freelist(p->child); 672 if (p->next) 673 mdoc_node_freelist(p->next); 674 675 mdoc_node_free(p); 676} 677 678 679/* 680 * Parse free-form text, that is, a line that does not begin with the 681 * control character. 682 */ 683static int 684parsetext(struct mdoc *m, int line, char *buf) 685{ 686 687 if (SEC_PROLOGUE == m->lastnamed) 688 return(perr(m, line, 0, ETEXTPROL)); 689 690 if (0 == buf[0] && ! (MDOC_LITERAL & m->flags)) 691 return(perr(m, line, 0, ENOBLANK)); 692 693 if ( ! mdoc_word_alloc(m, line, 0, buf)) 694 return(0); 695 696 m->next = MDOC_NEXT_SIBLING; 697 return(1); 698} 699 700 701static int 702macrowarn(struct mdoc *m, int ln, const char *buf) 703{ 704 if ( ! (MDOC_IGN_MACRO & m->pflags)) 705 return(mdoc_perr(m, ln, 1, 706 "unknown macro: %s%s", 707 buf, strlen(buf) > 3 ? "..." : "")); 708 return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX, 709 "unknown macro: %s%s", 710 buf, strlen(buf) > 3 ? "..." : "")); 711} 712 713 714 715/* 716 * Parse a macro line, that is, a line beginning with the control 717 * character. 718 */ 719int 720parsemacro(struct mdoc *m, int ln, char *buf) 721{ 722 int i, c; 723 char mac[5]; 724 725 /* Comments and empties are quickly ignored. */ 726 727 if (0 == buf[1]) 728 return(1); 729 730 if (' ' == buf[1]) { 731 i = 2; 732 while (buf[i] && ' ' == buf[i]) 733 i++; 734 if (0 == buf[i]) 735 return(1); 736 return(perr(m, ln, 1, ESPACE)); 737 } 738 739 if (buf[1] && '\\' == buf[1]) 740 if (buf[2] && '\"' == buf[2]) 741 return(1); 742 743 /* Copy the first word into a nil-terminated buffer. */ 744 745 for (i = 1; i < 5; i++) { 746 if (0 == (mac[i - 1] = buf[i])) 747 break; 748 else if (' ' == buf[i]) 749 break; 750 } 751 752 mac[i - 1] = 0; 753 754 if (i == 5 || i <= 2) { 755 if ( ! macrowarn(m, ln, mac)) 756 goto err; 757 return(1); 758 } 759 760 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) { 761 if ( ! macrowarn(m, ln, mac)) 762 goto err; 763 return(1); 764 } 765 766 /* The macro is sane. Jump to the next word. */ 767 768 while (buf[i] && ' ' == buf[i]) 769 i++; 770 771 /* Begin recursive parse sequence. */ 772 773 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 774 goto err; 775 776 return(1); 777 778err: /* Error out. */ 779 780 m->flags |= MDOC_HALT; 781 return(0); 782} 783