mdoc.c revision 1.4
1/* $Id: mdoc.c,v 1.4 2009/06/15 02:19:32 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17#include <assert.h> 18#include <ctype.h> 19#include <stdarg.h> 20#include <stdio.h> 21#include <stdlib.h> 22#include <string.h> 23 24#include "libmdoc.h" 25 26enum merr { 27 ENOCALL, 28 EBODYPROL, 29 EPROLBODY, 30 ESPACE, 31 ETEXTPROL, 32 ENOBLANK, 33 EMALLOC 34}; 35 36const char *const __mdoc_macronames[MDOC_MAX] = { 37 "\\\"", "Dd", "Dt", "Os", 38 "Sh", "Ss", "Pp", "D1", 39 "Dl", "Bd", "Ed", "Bl", 40 "El", "It", "Ad", "An", 41 "Ar", "Cd", "Cm", "Dv", 42 "Er", "Ev", "Ex", "Fa", 43 "Fd", "Fl", "Fn", "Ft", 44 "Ic", "In", "Li", "Nd", 45 "Nm", "Op", "Ot", "Pa", 46 "Rv", "St", "Va", "Vt", 47 /* LINTED */ 48 "Xr", "\%A", "\%B", "\%D", 49 /* LINTED */ 50 "\%I", "\%J", "\%N", "\%O", 51 /* LINTED */ 52 "\%P", "\%R", "\%T", "\%V", 53 "Ac", "Ao", "Aq", "At", 54 "Bc", "Bf", "Bo", "Bq", 55 "Bsx", "Bx", "Db", "Dc", 56 "Do", "Dq", "Ec", "Ef", 57 "Em", "Eo", "Fx", "Ms", 58 "No", "Ns", "Nx", "Ox", 59 "Pc", "Pf", "Po", "Pq", 60 "Qc", "Ql", "Qo", "Qq", 61 "Re", "Rs", "Sc", "So", 62 "Sq", "Sm", "Sx", "Sy", 63 "Tn", "Ux", "Xc", "Xo", 64 "Fo", "Fc", "Oo", "Oc", 65 "Bk", "Ek", "Bt", "Hf", 66 "Fr", "Ud", "Lb", "Ap", 67 "Lp", "Lk", "Mt", "Brq", 68 /* LINTED */ 69 "Bro", "Brc", "\%C", "Es", 70 /* LINTED */ 71 "En", "Dx", "\%Q" 72 }; 73 74const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 75 "split", "nosplit", "ragged", 76 "unfilled", "literal", "file", 77 "offset", "bullet", "dash", 78 "hyphen", "item", "enum", 79 "tag", "diag", "hang", 80 "ohang", "inset", "column", 81 "width", "compact", "std", 82 "filled", "words", "emphasis", 83 "symbolic", "nested" 84 }; 85 86const char * const *mdoc_macronames = __mdoc_macronames; 87const char * const *mdoc_argnames = __mdoc_argnames; 88 89static void mdoc_free1(struct mdoc *); 90static int mdoc_alloc1(struct mdoc *); 91static struct mdoc_node *node_alloc(struct mdoc *, int, int, 92 int, enum mdoc_type); 93static int node_append(struct mdoc *, 94 struct mdoc_node *); 95static int parsetext(struct mdoc *, int, char *); 96static int parsemacro(struct mdoc *, int, char *); 97static int macrowarn(struct mdoc *, int, const char *); 98static int perr(struct mdoc *, int, int, enum merr); 99 100#define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t)) 101 102/* 103 * Get the first (root) node of the parse tree. 104 */ 105const struct mdoc_node * 106mdoc_node(const struct mdoc *m) 107{ 108 109 return(MDOC_HALT & m->flags ? NULL : m->first); 110} 111 112 113const struct mdoc_meta * 114mdoc_meta(const struct mdoc *m) 115{ 116 117 return(MDOC_HALT & m->flags ? NULL : &m->meta); 118} 119 120 121static void 122mdoc_free1(struct mdoc *mdoc) 123{ 124 125 if (mdoc->first) 126 mdoc_node_freelist(mdoc->first); 127 if (mdoc->meta.title) 128 free(mdoc->meta.title); 129 if (mdoc->meta.os) 130 free(mdoc->meta.os); 131 if (mdoc->meta.name) 132 free(mdoc->meta.name); 133 if (mdoc->meta.arch) 134 free(mdoc->meta.arch); 135 if (mdoc->meta.vol) 136 free(mdoc->meta.vol); 137} 138 139 140static int 141mdoc_alloc1(struct mdoc *mdoc) 142{ 143 144 bzero(&mdoc->meta, sizeof(struct mdoc_meta)); 145 mdoc->flags = 0; 146 mdoc->lastnamed = mdoc->lastsec = 0; 147 mdoc->last = calloc(1, sizeof(struct mdoc_node)); 148 if (NULL == mdoc->last) 149 return(0); 150 151 mdoc->first = mdoc->last; 152 mdoc->last->type = MDOC_ROOT; 153 mdoc->next = MDOC_NEXT_CHILD; 154 return(1); 155} 156 157 158/* 159 * Free up all resources contributed by a parse: the node tree, 160 * meta-data and so on. Then reallocate the root node for another 161 * parse. 162 */ 163int 164mdoc_reset(struct mdoc *mdoc) 165{ 166 167 mdoc_free1(mdoc); 168 return(mdoc_alloc1(mdoc)); 169} 170 171 172/* 173 * Completely free up all resources. 174 */ 175void 176mdoc_free(struct mdoc *mdoc) 177{ 178 179 mdoc_free1(mdoc); 180 if (mdoc->htab) 181 mdoc_hash_free(mdoc->htab); 182 free(mdoc); 183} 184 185 186struct mdoc * 187mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb) 188{ 189 struct mdoc *p; 190 191 if (NULL == (p = calloc(1, sizeof(struct mdoc)))) 192 return(NULL); 193 if (cb) 194 (void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb)); 195 196 p->data = data; 197 p->pflags = pflags; 198 199 if (NULL == (p->htab = mdoc_hash_alloc())) { 200 free(p); 201 return(NULL); 202 } else if (mdoc_alloc1(p)) 203 return(p); 204 205 free(p); 206 return(NULL); 207} 208 209 210/* 211 * Climb back up the parse tree, validating open scopes. Mostly calls 212 * through to macro_end in macro.c. 213 */ 214int 215mdoc_endparse(struct mdoc *m) 216{ 217 218 if (MDOC_HALT & m->flags) 219 return(0); 220 else if (mdoc_macroend(m)) 221 return(1); 222 m->flags |= MDOC_HALT; 223 return(0); 224} 225 226 227/* 228 * Main parse routine. Parses a single line -- really just hands off to 229 * the macro or text parser. 230 */ 231int 232mdoc_parseln(struct mdoc *m, int ln, char *buf) 233{ 234 235 /* If in error-mode, then we parse no more. */ 236 237 if (MDOC_HALT & m->flags) 238 return(0); 239 240 return('.' == *buf ? parsemacro(m, ln, buf) : 241 parsetext(m, ln, buf)); 242} 243 244 245void 246mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...) 247{ 248 char buf[256]; 249 va_list ap; 250 251 if (NULL == mdoc->cb.mdoc_msg) 252 return; 253 254 va_start(ap, fmt); 255 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 256 va_end(ap); 257 (*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf); 258} 259 260 261int 262mdoc_verr(struct mdoc *mdoc, int ln, int pos, 263 const char *fmt, ...) 264{ 265 char buf[256]; 266 va_list ap; 267 268 if (NULL == mdoc->cb.mdoc_err) 269 return(0); 270 271 va_start(ap, fmt); 272 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 273 va_end(ap); 274 return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf)); 275} 276 277 278int 279mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, 280 enum mdoc_warn type, const char *fmt, ...) 281{ 282 char buf[256]; 283 va_list ap; 284 285 if (NULL == mdoc->cb.mdoc_warn) 286 return(0); 287 288 va_start(ap, fmt); 289 (void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 290 va_end(ap); 291 return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf)); 292} 293 294 295int 296mdoc_macro(struct mdoc *m, int tok, 297 int ln, int pp, int *pos, char *buf) 298{ 299 300 /* FIXME - these should happen during validation. */ 301 302 if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 303 SEC_PROLOGUE != m->lastnamed) 304 return(perr(m, ln, pp, EPROLBODY)); 305 306 if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 307 SEC_PROLOGUE == m->lastnamed) 308 return(perr(m, ln, pp, EBODYPROL)); 309 310 if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) 311 return(perr(m, ln, pp, ENOCALL)); 312 313 return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf)); 314} 315 316 317static int 318perr(struct mdoc *m, int line, int pos, enum merr type) 319{ 320 char *p; 321 322 p = NULL; 323 switch (type) { 324 case (ENOCALL): 325 p = "not callable"; 326 break; 327 case (EPROLBODY): 328 p = "macro disallowed in document body"; 329 break; 330 case (EBODYPROL): 331 p = "macro disallowed in document prologue"; 332 break; 333 case (EMALLOC): 334 p = "memory exhausted"; 335 break; 336 case (ETEXTPROL): 337 p = "text disallowed in document prologue"; 338 break; 339 case (ENOBLANK): 340 p = "blank lines disallowed in non-literal contexts"; 341 break; 342 case (ESPACE): 343 p = "whitespace disallowed after delimiter"; 344 break; 345 } 346 assert(p); 347 return(mdoc_perr(m, line, pos, p)); 348} 349 350 351static int 352node_append(struct mdoc *mdoc, struct mdoc_node *p) 353{ 354 355 assert(mdoc->last); 356 assert(mdoc->first); 357 assert(MDOC_ROOT != p->type); 358 359 switch (mdoc->next) { 360 case (MDOC_NEXT_SIBLING): 361 mdoc->last->next = p; 362 p->prev = mdoc->last; 363 p->parent = mdoc->last->parent; 364 break; 365 case (MDOC_NEXT_CHILD): 366 mdoc->last->child = p; 367 p->parent = mdoc->last; 368 break; 369 default: 370 abort(); 371 /* NOTREACHED */ 372 } 373 374 if ( ! mdoc_valid_pre(mdoc, p)) 375 return(0); 376 if ( ! mdoc_action_pre(mdoc, p)) 377 return(0); 378 379 switch (p->type) { 380 case (MDOC_HEAD): 381 assert(MDOC_BLOCK == p->parent->type); 382 p->parent->head = p; 383 break; 384 case (MDOC_TAIL): 385 assert(MDOC_BLOCK == p->parent->type); 386 p->parent->tail = p; 387 break; 388 case (MDOC_BODY): 389 assert(MDOC_BLOCK == p->parent->type); 390 p->parent->body = p; 391 break; 392 default: 393 break; 394 } 395 396 mdoc->last = p; 397 398 switch (p->type) { 399 case (MDOC_TEXT): 400 if ( ! mdoc_valid_post(mdoc)) 401 return(0); 402 if ( ! mdoc_action_post(mdoc)) 403 return(0); 404 break; 405 default: 406 break; 407 } 408 409 return(1); 410} 411 412 413static struct mdoc_node * 414node_alloc(struct mdoc *mdoc, int line, 415 int pos, int tok, enum mdoc_type type) 416{ 417 struct mdoc_node *p; 418 419 if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) { 420 (void)verr(mdoc, EMALLOC); 421 return(NULL); 422 } 423 424 p->sec = mdoc->lastsec; 425 p->line = line; 426 p->pos = pos; 427 p->tok = tok; 428 if (MDOC_TEXT != (p->type = type)) 429 assert(p->tok >= 0); 430 431 return(p); 432} 433 434 435int 436mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok) 437{ 438 struct mdoc_node *p; 439 440 p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 441 if (NULL == p) 442 return(0); 443 return(node_append(mdoc, p)); 444} 445 446 447int 448mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok) 449{ 450 struct mdoc_node *p; 451 452 assert(mdoc->first); 453 assert(mdoc->last); 454 455 p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 456 if (NULL == p) 457 return(0); 458 return(node_append(mdoc, p)); 459} 460 461 462int 463mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok) 464{ 465 struct mdoc_node *p; 466 467 p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 468 if (NULL == p) 469 return(0); 470 return(node_append(mdoc, p)); 471} 472 473 474int 475mdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 476 int tok, struct mdoc_arg *args) 477{ 478 struct mdoc_node *p; 479 480 p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 481 if (NULL == p) 482 return(0); 483 p->args = args; 484 if (p->args) 485 (args->refcnt)++; 486 return(node_append(mdoc, p)); 487} 488 489 490int 491mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 492 int tok, struct mdoc_arg *args) 493{ 494 struct mdoc_node *p; 495 496 p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 497 if (NULL == p) 498 return(0); 499 p->args = args; 500 if (p->args) 501 (args->refcnt)++; 502 return(node_append(mdoc, p)); 503} 504 505 506int 507mdoc_word_alloc(struct mdoc *mdoc, 508 int line, int pos, const char *word) 509{ 510 struct mdoc_node *p; 511 512 p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT); 513 if (NULL == p) 514 return(0); 515 if (NULL == (p->string = strdup(word))) { 516 (void)verr(mdoc, EMALLOC); 517 return(0); 518 } 519 return(node_append(mdoc, p)); 520} 521 522 523void 524mdoc_node_free(struct mdoc_node *p) 525{ 526 527 if (p->string) 528 free(p->string); 529 if (p->args) 530 mdoc_argv_free(p->args); 531 free(p); 532} 533 534 535void 536mdoc_node_freelist(struct mdoc_node *p) 537{ 538 539 if (p->child) 540 mdoc_node_freelist(p->child); 541 if (p->next) 542 mdoc_node_freelist(p->next); 543 544 mdoc_node_free(p); 545} 546 547 548/* 549 * Parse free-form text, that is, a line that does not begin with the 550 * control character. 551 */ 552static int 553parsetext(struct mdoc *m, int line, char *buf) 554{ 555 556 if (SEC_PROLOGUE == m->lastnamed) 557 return(perr(m, line, 0, ETEXTPROL)); 558 559 if (0 == buf[0] && ! (MDOC_LITERAL & m->flags)) 560 return(perr(m, line, 0, ENOBLANK)); 561 562 if ( ! mdoc_word_alloc(m, line, 0, buf)) 563 return(0); 564 565 m->next = MDOC_NEXT_SIBLING; 566 return(1); 567} 568 569 570static int 571macrowarn(struct mdoc *m, int ln, const char *buf) 572{ 573 if ( ! (MDOC_IGN_MACRO & m->pflags)) 574 return(mdoc_perr(m, ln, 1, 575 "unknown macro: %s%s", 576 buf, strlen(buf) > 3 ? "..." : "")); 577 return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX, 578 "unknown macro: %s%s", 579 buf, strlen(buf) > 3 ? "..." : "")); 580} 581 582 583 584/* 585 * Parse a macro line, that is, a line beginning with the control 586 * character. 587 */ 588int 589parsemacro(struct mdoc *m, int ln, char *buf) 590{ 591 int i, c; 592 char mac[5]; 593 594 /* Comments and empties are quickly ignored. */ 595 596 if (0 == buf[1]) 597 return(1); 598 599 if (' ' == buf[1]) { 600 i = 2; 601 while (buf[i] && ' ' == buf[i]) 602 i++; 603 if (0 == buf[i]) 604 return(1); 605 return(perr(m, ln, 1, ESPACE)); 606 } 607 608 if (buf[1] && '\\' == buf[1]) 609 if (buf[2] && '\"' == buf[2]) 610 return(1); 611 612 /* Copy the first word into a nil-terminated buffer. */ 613 614 for (i = 1; i < 5; i++) { 615 if (0 == (mac[i - 1] = buf[i])) 616 break; 617 else if (' ' == buf[i]) 618 break; 619 } 620 621 mac[i - 1] = 0; 622 623 if (i == 5 || i <= 2) { 624 if ( ! macrowarn(m, ln, mac)) 625 goto err; 626 return(1); 627 } 628 629 if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) { 630 if ( ! macrowarn(m, ln, mac)) 631 goto err; 632 return(1); 633 } 634 635 /* The macro is sane. Jump to the next word. */ 636 637 while (buf[i] && ' ' == buf[i]) 638 i++; 639 640 /* Begin recursive parse sequence. */ 641 642 if ( ! mdoc_macro(m, c, ln, 1, &i, buf)) 643 goto err; 644 645 return(1); 646 647err: /* Error out. */ 648 649 m->flags |= MDOC_HALT; 650 return(0); 651} 652