mdoc.c revision 1.3
1/* $Id: mdoc.c,v 1.3 2009/06/14 23:00:57 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <assert.h> 18#include <ctype.h> 19#include <stdarg.h> 20#include <stdio.h> 21#include <stdlib.h> 22#include <string.h> 23 24#include "libmdoc.h" 25 26enum merr { 27 ENOCALL, 28 EBODYPROL, 29 EPROLBODY, 30 ESPACE, 31 ETEXTPROL, 32 ENOBLANK, 33 EMALLOC 34}; 35 36const char *const __mdoc_macronames[MDOC_MAX] = { 37 "\\\"", "Dd", "Dt", "Os", 38 "Sh", "Ss", "Pp", "D1", 39 "Dl", "Bd", "Ed", "Bl", 40 "El", "It", "Ad", "An", 41 "Ar", "Cd", "Cm", "Dv", 42 "Er", "Ev", "Ex", "Fa", 43 "Fd", "Fl", "Fn", "Ft", 44 "Ic", "In", "Li", "Nd", 45 "Nm", "Op", "Ot", "Pa", 46 "Rv", "St", "Va", "Vt", 47 /* LINTED */ 48 "Xr", "\%A", "\%B", "\%D", 49 /* LINTED */ 50 "\%I", "\%J", "\%N", "\%O", 51 /* LINTED */ 52 "\%P", "\%R", "\%T", "\%V", 53 "Ac", "Ao", "Aq", "At", 54 "Bc", "Bf", "Bo", "Bq", 55 "Bsx", "Bx", "Db", "Dc", 56 "Do", "Dq", "Ec", "Ef", 57 "Em", "Eo", "Fx", "Ms", 58 "No", "Ns", "Nx", "Ox", 59 "Pc", "Pf", "Po", "Pq", 60 "Qc", "Ql", "Qo", "Qq", 61 "Re", "Rs", "Sc", "So", 62 "Sq", "Sm", "Sx", "Sy", 63 "Tn", "Ux", "Xc", "Xo", 64 "Fo", "Fc", "Oo", "Oc", 65 "Bk", "Ek", "Bt", "Hf", 66 "Fr", "Ud", "Lb", "Ap", 67 "Lp", "Lk", "Mt", "Brq", 68 /* LINTED */ 69 "Bro", "Brc", "\%C", "Es", 70 /* LINTED */ 71 "En", "Dx", "\%Q" 72 }; 73 74const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 75 "split", "nosplit", "ragged", 76 "unfilled", "literal", "file", 77 "offset", "bullet", "dash", 78 "hyphen", "item", "enum", 79 "tag", "diag", "hang", 80 "ohang", "inset", "column", 81 "width", "compact", "std", 82 "filled", "words", "emphasis", 83 "symbolic", "nested" 84 }; 85 86const char * const *mdoc_macronames = __mdoc_macronames; 87const char * const *mdoc_argnames = __mdoc_argnames; 88 89static void mdoc_free1(struct mdoc *); 90static int mdoc_alloc1(struct mdoc *); 91static struct mdoc_node *node_alloc(struct mdoc *, int, int, 92 int, enum mdoc_type); 93static int node_append(struct mdoc *, 94 struct mdoc_node *); 95static int parsetext(struct mdoc *, int, char *); 96static int parsemacro(struct mdoc *, int, char *); 97static int macrowarn(struct mdoc *, int, const char *); 98static int perr(struct mdoc *, int, int, enum merr); 99 100#define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t)) 101 102/* 103 * Get the first (root) node of the parse tree. 104 */ 105const struct mdoc_node * 106mdoc_node(const struct mdoc *m) 107{ 108 109 return(MDOC_HALT & m->flags ? NULL : m->first); 110} 111 112 113const struct mdoc_meta * 114mdoc_meta(const struct mdoc *m) 115{ 116 117 return(MDOC_HALT & m->flags ? NULL : &m->meta); 118} 119 120 121static void 122mdoc_free1(struct mdoc *mdoc) 123{ 124 125 if (mdoc->first) 126 mdoc_node_freelist(mdoc->first); 127 if (mdoc->meta.title) 128 free(mdoc->meta.title); 129 if (mdoc->meta.os) 130 free(mdoc->meta.os); 131 if (mdoc->meta.name) 132 free(mdoc->meta.name); 133 if (mdoc->meta.arch) 134 free(mdoc->meta.arch); 135 if (mdoc->meta.vol) 136 free(mdoc->meta.vol); 137} 138 139 140static int 141mdoc_alloc1(struct mdoc *mdoc) 142{ 143 144 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 145 mdoc->flags = 0; 146 mdoc->lastnamed = mdoc->lastsec = 0; 147 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 148 if (NULL == mdoc->last) 149 return(0); 150 151 mdoc->first = mdoc->last; 152 mdoc->last->type = MDOC_ROOT; 153 mdoc->next = MDOC_NEXT_CHILD; 154 return(1); 155} 156 157 158/* 159 * Free up all resources contributed by a parse: the node tree, 160 * meta-data and so on. Then reallocate the root node for another 161 * parse. 162 */ 163int 164mdoc_reset(struct mdoc *mdoc) 165{ 166 167 mdoc_free1(mdoc); 168 return(mdoc_alloc1(mdoc)); 169} 170 171 172/* 173 * Completely free up all resources. 174 */ 175void 176mdoc_free(struct mdoc *mdoc) 177{ 178 179 mdoc_free1(mdoc); 180 if (mdoc->htab) 181 mdoc_hash_free(mdoc->htab); 182 free(mdoc); 183} 184 185 186struct mdoc * 187mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 188{ 189 struct mdoc *p; 190 191 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 192 return(NULL); 193 if (cb) 194 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 195 196 p->data = data; 197 p->pflags = pflags; 198 199 if (NULL == (p->htab = mdoc_hash_alloc())) { 200 free(p); 201 return(NULL); 202 } else if (mdoc_alloc1(p)) 203 return(p); 204 205 free(p); 206 return(NULL); 207} 208 209 210/* 211 * Climb back up the parse tree, validating open scopes. Mostly calls 212 * through to macro_end in macro.c. 213 */ 214int 215mdoc_endparse(struct mdoc *m) 216{ 217 218 if (MDOC_HALT & m->flags) 219 return(0); 220 else if (mdoc_macroend(m)) 221 return(1); 222 m->flags |= MDOC_HALT; 223 return(0); 224} 225 226 227/* 228 * Main parse routine. Parses a single line -- really just hands off to 229 * the macro or text parser. 230 */ 231int 232mdoc_parseln(struct mdoc *m, int ln, char *buf) 233{ 234 235 /* If in error-mode, then we parse no more. */ 236 237 if (MDOC_HALT & m->flags) 238 return(0); 239 240 return('.' == *buf ? parsemacro(m, ln, buf) : 241 parsetext(m, ln, buf)); 242} 243 244 245void 246mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 247{ 248 char buf[256]; 249 va_list ap; 250 251 if (NULL == mdoc->cb.mdoc_msg) 252 return; 253 254 va_start(ap, fmt); 255 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 256 va_end(ap); 257 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf); 258} 259 260 261int 262mdoc_verr(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 263{ 264 char buf[256]; 265 va_list ap; 266 267 if (NULL == mdoc->cb.mdoc_err) 268 return(0); 269 270 va_start(ap, fmt); 271 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 272 va_end(ap); 273 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 274} 275 276 277int 278mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, 279 enum mdoc_warn type, const char *fmt, ...) 280{ 281 char buf[256]; 282 va_list ap; 283 284 if (NULL == mdoc->cb.mdoc_warn) 285 return(0); 286 287 va_start(ap, fmt); 288 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 289 va_end(ap); 290 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf)); 291} 292 293 294int 295mdoc_nwarn(struct mdoc *mdoc, const struct mdoc_node *node, enum mdoc_warn type, 296 const char *fmt, ...) 297{ 298 char buf[256]; 299 va_list ap; 300 301 if (NULL == mdoc->cb.mdoc_warn) 302 return(0); 303 304 va_start(ap, fmt); 305 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 306 va_end(ap); 307 return((*mdoc->cb.mdoc_warn)(mdoc->data, node->line, node->pos, type, 308 buf)); 309} 310 311int 312mdoc_nerr(struct mdoc *mdoc, const struct mdoc_node *node, const char *fmt, ...) 313{ 314 char buf[256]; 315 va_list ap; 316 317 if (NULL == mdoc->cb.mdoc_err) 318 return(0); 319 320 va_start(ap, fmt); 321 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 322 va_end(ap); 323 return((*mdoc->cb.mdoc_err)(mdoc->data, node->line, node->pos, buf)); 324} 325 326 327int 328mdoc_warn(struct mdoc *mdoc, enum mdoc_warn type, const char *fmt, ...) 329{ 330 char buf[256]; 331 va_list ap; 332 333 if (NULL == mdoc->cb.mdoc_warn) 334 return(0); 335 336 va_start(ap, fmt); 337 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 338 va_end(ap); 339 return((*mdoc->cb.mdoc_warn)(mdoc->data, mdoc->last->line, 340 mdoc->last->pos, type, buf)); 341} 342 343 344int 345mdoc_err(struct mdoc *mdoc, const char *fmt, ...) 346{ 347 char buf[256]; 348 va_list ap; 349 350 if (NULL == mdoc->cb.mdoc_err) 351 return(0); 352 353 va_start(ap, fmt); 354 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 355 va_end(ap); 356 return((*mdoc->cb.mdoc_err)(mdoc->data, mdoc->last->line, 357 mdoc->last->pos, buf)); 358} 359 360 361void 362mdoc_msg(struct mdoc *mdoc, const char *fmt, ...) 363{ 364 char buf[256]; 365 va_list ap; 366 367 if (NULL == mdoc->cb.mdoc_msg) 368 return; 369 370 va_start(ap, fmt); 371 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 372 va_end(ap); 373 (*mdoc->cb.mdoc_msg)(mdoc->data, mdoc->last->line, mdoc->last->pos, 374 buf); 375} 376 377 378void 379mdoc_pmsg(struct mdoc *mdoc, int line, int pos, const char *fmt, ...) 380{ 381 char buf[256]; 382 va_list ap; 383 384 if (NULL == mdoc->cb.mdoc_msg) 385 return; 386 387 va_start(ap, fmt); 388 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 389 va_end(ap); 390 (*mdoc->cb.mdoc_msg)(mdoc->data, line, pos, buf); 391} 392 393 394int 395mdoc_pwarn(struct mdoc *mdoc, int line, int pos, enum mdoc_warn type, 396 const char *fmt, ...) 397{ 398 char buf[256]; 399 va_list ap; 400 401 if (NULL == mdoc->cb.mdoc_warn) 402 return(0); 403 404 va_start(ap, fmt); 405 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 406 va_end(ap); 407 return((*mdoc->cb.mdoc_warn)(mdoc->data, line, pos, type, buf)); 408} 409 410int 411mdoc_perr(struct mdoc *mdoc, int line, int pos, const char *fmt, ...) 412{ 413 char buf[256]; 414 va_list ap; 415 416 if (NULL == mdoc->cb.mdoc_err) 417 return(0); 418 419 va_start(ap, fmt); 420 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 421 va_end(ap); 422 return((*mdoc->cb.mdoc_err)(mdoc->data, line, pos, buf)); 423} 424 425 426int 427mdoc_macro(struct mdoc *m, int tok, 428 int ln, int pp, int *pos, char *buf) 429{ 430 431 /* FIXME - these should happen during validation. */ 432 433 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 434 SEC_PROLOGUE != m->lastnamed) 435 return(perr(m, ln, pp, EPROLBODY)); 436 437 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 438 SEC_PROLOGUE == m->lastnamed) 439 return(perr(m, ln, pp, EBODYPROL)); 440 441 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) 442 return(perr(m, ln, pp, ENOCALL)); 443 444 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 445} 446 447 448static int 449perr(struct mdoc *m, int line, int pos, enum merr type) 450{ 451 char *p; 452 453 p = NULL; 454 switch (type) { 455 case (ENOCALL): 456 p = "not callable"; 457 break; 458 case (EPROLBODY): 459 p = "macro disallowed in document body"; 460 break; 461 case (EBODYPROL): 462 p = "macro disallowed in document prologue"; 463 break; 464 case (EMALLOC): 465 p = "memory exhausted"; 466 break; 467 case (ETEXTPROL): 468 p = "text disallowed in document prologue"; 469 break; 470 case (ENOBLANK): 471 p = "blank lines disallowed in non-literal contexts"; 472 break; 473 case (ESPACE): 474 p = "whitespace disallowed after delimiter"; 475 break; 476 } 477 assert(p); 478 return(mdoc_perr(m, line, pos, p)); 479} 480 481 482static int 483node_append(struct mdoc *mdoc, struct mdoc_node *p) 484{ 485 486 assert(mdoc->last); 487 assert(mdoc->first); 488 assert(MDOC_ROOT != p->type); 489 490 switch (mdoc->next) { 491 case (MDOC_NEXT_SIBLING): 492 mdoc->last->next = p; 493 p->prev = mdoc->last; 494 p->parent = mdoc->last->parent; 495 break; 496 case (MDOC_NEXT_CHILD): 497 mdoc->last->child = p; 498 p->parent = mdoc->last; 499 break; 500 default: 501 abort(); 502 /* NOTREACHED */ 503 } 504 505 if ( ! mdoc_valid_pre(mdoc, p)) 506 return(0); 507 if ( ! mdoc_action_pre(mdoc, p)) 508 return(0); 509 510 switch (p->type) { 511 case (MDOC_HEAD): 512 assert(MDOC_BLOCK == p->parent->type); 513 p->parent->head = p; 514 break; 515 case (MDOC_TAIL): 516 assert(MDOC_BLOCK == p->parent->type); 517 p->parent->tail = p; 518 break; 519 case (MDOC_BODY): 520 assert(MDOC_BLOCK == p->parent->type); 521 p->parent->body = p; 522 break; 523 default: 524 break; 525 } 526 527 mdoc->last = p; 528 529 switch (p->type) { 530 case (MDOC_TEXT): 531 if ( ! mdoc_valid_post(mdoc)) 532 return(0); 533 if ( ! mdoc_action_post(mdoc)) 534 return(0); 535 break; 536 default: 537 break; 538 } 539 540 return(1); 541} 542 543 544static struct mdoc_node * 545node_alloc(struct mdoc *mdoc, int line, 546 int pos, int tok, enum mdoc_type type) 547{ 548 struct mdoc_node *p; 549 550 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 551 (void)verr(mdoc, EMALLOC); 552 return(NULL); 553 } 554 555 p->sec = mdoc->lastsec; 556 p->line = line; 557 p->pos = pos; 558 p->tok = tok; 559 if (MDOC_TEXT != (p->type = type)) 560 assert(p->tok >= 0); 561 562 return(p); 563} 564 565 566int 567mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok) 568{ 569 struct mdoc_node *p; 570 571 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 572 if (NULL == p) 573 return(0); 574 return(node_append(mdoc, p)); 575} 576 577 578int 579mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok) 580{ 581 struct mdoc_node *p; 582 583 assert(mdoc->first); 584 assert(mdoc->last); 585 586 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 587 if (NULL == p) 588 return(0); 589 return(node_append(mdoc, p)); 590} 591 592 593int 594mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok) 595{ 596 struct mdoc_node *p; 597 598 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 599 if (NULL == p) 600 return(0); 601 return(node_append(mdoc, p)); 602} 603 604 605int 606mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 607 int tok, struct mdoc_arg *args) 608{ 609 struct mdoc_node *p; 610 611 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 612 if (NULL == p) 613 return(0); 614 if ((p->args = args)) 615 (args->refcnt)++; 616 return(node_append(mdoc, p)); 617} 618 619 620int 621mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 622 int tok, struct mdoc_arg *args) 623{ 624 struct mdoc_node *p; 625 626 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 627 if (NULL == p) 628 return(0); 629 if ((p->args = args)) 630 (args->refcnt)++; 631 return(node_append(mdoc, p)); 632} 633 634 635int 636mdoc_word_alloc(struct mdoc *mdoc, 637 int line, int pos, const char *word) 638{ 639 struct mdoc_node *p; 640 641 p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT); 642 if (NULL == p) 643 return(0); 644 if (NULL == (p->string = strdup(word))) { 645 (void)verr(mdoc, EMALLOC); 646 return(0); 647 } 648 return(node_append(mdoc, p)); 649} 650 651 652void 653mdoc_node_free(struct mdoc_node *p) 654{ 655 656 if (p->string) 657 free(p->string); 658 if (p->args) 659 mdoc_argv_free(p->args); 660 free(p); 661} 662 663 664void 665mdoc_node_freelist(struct mdoc_node *p) 666{ 667 668 if (p->child) 669 mdoc_node_freelist(p->child); 670 if (p->next) 671 mdoc_node_freelist(p->next); 672 673 mdoc_node_free(p); 674} 675 676 677/* 678 * Parse free-form text, that is, a line that does not begin with the 679 * control character. 680 */ 681static int 682parsetext(struct mdoc *m, int line, char *buf) 683{ 684 685 if (SEC_PROLOGUE == m->lastnamed) 686 return(perr(m, line, 0, ETEXTPROL)); 687 688 if (0 == buf[0] && ! (MDOC_LITERAL & m->flags)) 689 return(perr(m, line, 0, ENOBLANK)); 690 691 if ( ! mdoc_word_alloc(m, line, 0, buf)) 692 return(0); 693 694 m->next = MDOC_NEXT_SIBLING; 695 return(1); 696} 697 698 699static int 700macrowarn(struct mdoc *m, int ln, const char *buf) 701{ 702 if ( ! (MDOC_IGN_MACRO & m->pflags)) 703 return(mdoc_perr(m, ln, 1, 704 "unknown macro: %s%s", 705 buf, strlen(buf) > 3 ? "..." : "")); 706 return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX, 707 "unknown macro: %s%s", 708 buf, strlen(buf) > 3 ? "..." : "")); 709} 710 711 712 713/* 714 * Parse a macro line, that is, a line beginning with the control 715 * character. 716 */ 717int 718parsemacro(struct mdoc *m, int ln, char *buf) 719{ 720 int i, c; 721 char mac[5]; 722 723 /* Comments and empties are quickly ignored. */ 724 725 if (0 == buf[1]) 726 return(1); 727 728 if (' ' == buf[1]) { 729 i = 2; 730 while (buf[i] && ' ' == buf[i]) 731 i++; 732 if (0 == buf[i]) 733 return(1); 734 return(perr(m, ln, 1, ESPACE)); 735 } 736 737 if (buf[1] && '\\' == buf[1]) 738 if (buf[2] && '\"' == buf[2]) 739 return(1); 740 741 /* Copy the first word into a nil-terminated buffer. */ 742 743 for (i = 1; i < 5; i++) { 744 if (0 == (mac[i - 1] = buf[i])) 745 break; 746 else if (' ' == buf[i]) 747 break; 748 } 749 750 mac[i - 1] = 0; 751 752 if (i == 5 || i <= 2) { 753 if ( ! macrowarn(m, ln, mac)) 754 goto err; 755 return(1); 756 } 757 758 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) { 759 if ( ! macrowarn(m, ln, mac)) 760 goto err; 761 return(1); 762 } 763 764 /* The macro is sane. Jump to the next word. */ 765 766 while (buf[i] && ' ' == buf[i]) 767 i++; 768 769 /* Begin recursive parse sequence. */ 770 771 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 772 goto err; 773 774 return(1); 775 776err: /* Error out. */ 777 778 m->flags |= MDOC_HALT; 779 return(0); 780} 781