mdoc.c revision 261344
1261344Suqs/* $Id: mdoc.c,v 1.206 2013/12/24 19:11:46 schwarze Exp $ */ 2241675Suqs/* 3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4261344Suqs * Copyright (c) 2010, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> 5241675Suqs * 6241675Suqs * Permission to use, copy, modify, and distribute this software for any 7241675Suqs * purpose with or without fee is hereby granted, provided that the above 8241675Suqs * copyright notice and this permission notice appear in all copies. 9241675Suqs * 10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17241675Suqs */ 18241675Suqs#ifdef HAVE_CONFIG_H 19241675Suqs#include "config.h" 20241675Suqs#endif 21241675Suqs 22241675Suqs#include <sys/types.h> 23241675Suqs 24241675Suqs#include <assert.h> 25241675Suqs#include <stdarg.h> 26241675Suqs#include <stdio.h> 27241675Suqs#include <stdlib.h> 28241675Suqs#include <string.h> 29241675Suqs#include <time.h> 30241675Suqs 31241675Suqs#include "mdoc.h" 32241675Suqs#include "mandoc.h" 33241675Suqs#include "libmdoc.h" 34241675Suqs#include "libmandoc.h" 35241675Suqs 36241675Suqsconst char *const __mdoc_macronames[MDOC_MAX] = { 37241675Suqs "Ap", "Dd", "Dt", "Os", 38241675Suqs "Sh", "Ss", "Pp", "D1", 39241675Suqs "Dl", "Bd", "Ed", "Bl", 40241675Suqs "El", "It", "Ad", "An", 41241675Suqs "Ar", "Cd", "Cm", "Dv", 42241675Suqs "Er", "Ev", "Ex", "Fa", 43241675Suqs "Fd", "Fl", "Fn", "Ft", 44241675Suqs "Ic", "In", "Li", "Nd", 45241675Suqs "Nm", "Op", "Ot", "Pa", 46241675Suqs "Rv", "St", "Va", "Vt", 47241675Suqs /* LINTED */ 48241675Suqs "Xr", "%A", "%B", "%D", 49241675Suqs /* LINTED */ 50241675Suqs "%I", "%J", "%N", "%O", 51241675Suqs /* LINTED */ 52241675Suqs "%P", "%R", "%T", "%V", 53241675Suqs "Ac", "Ao", "Aq", "At", 54241675Suqs "Bc", "Bf", "Bo", "Bq", 55241675Suqs "Bsx", "Bx", "Db", "Dc", 56241675Suqs "Do", "Dq", "Ec", "Ef", 57241675Suqs "Em", "Eo", "Fx", "Ms", 58241675Suqs "No", "Ns", "Nx", "Ox", 59241675Suqs "Pc", "Pf", "Po", "Pq", 60241675Suqs "Qc", "Ql", "Qo", "Qq", 61241675Suqs "Re", "Rs", "Sc", "So", 62241675Suqs "Sq", "Sm", "Sx", "Sy", 63241675Suqs "Tn", "Ux", "Xc", "Xo", 64241675Suqs "Fo", "Fc", "Oo", "Oc", 65241675Suqs "Bk", "Ek", "Bt", "Hf", 66241675Suqs "Fr", "Ud", "Lb", "Lp", 67241675Suqs "Lk", "Mt", "Brq", "Bro", 68241675Suqs /* LINTED */ 69241675Suqs "Brc", "%C", "Es", "En", 70241675Suqs /* LINTED */ 71241675Suqs "Dx", "%Q", "br", "sp", 72241675Suqs /* LINTED */ 73241675Suqs "%U", "Ta" 74241675Suqs }; 75241675Suqs 76241675Suqsconst char *const __mdoc_argnames[MDOC_ARG_MAX] = { 77241675Suqs "split", "nosplit", "ragged", 78241675Suqs "unfilled", "literal", "file", 79241675Suqs "offset", "bullet", "dash", 80241675Suqs "hyphen", "item", "enum", 81241675Suqs "tag", "diag", "hang", 82241675Suqs "ohang", "inset", "column", 83241675Suqs "width", "compact", "std", 84241675Suqs "filled", "words", "emphasis", 85241675Suqs "symbolic", "nested", "centered" 86241675Suqs }; 87241675Suqs 88241675Suqsconst char * const *mdoc_macronames = __mdoc_macronames; 89241675Suqsconst char * const *mdoc_argnames = __mdoc_argnames; 90241675Suqs 91241675Suqsstatic void mdoc_node_free(struct mdoc_node *); 92241675Suqsstatic void mdoc_node_unlink(struct mdoc *, 93241675Suqs struct mdoc_node *); 94241675Suqsstatic void mdoc_free1(struct mdoc *); 95241675Suqsstatic void mdoc_alloc1(struct mdoc *); 96241675Suqsstatic struct mdoc_node *node_alloc(struct mdoc *, int, int, 97241675Suqs enum mdoct, enum mdoc_type); 98241675Suqsstatic int node_append(struct mdoc *, 99241675Suqs struct mdoc_node *); 100241675Suqs#if 0 101241675Suqsstatic int mdoc_preptext(struct mdoc *, int, char *, int); 102241675Suqs#endif 103241675Suqsstatic int mdoc_ptext(struct mdoc *, int, char *, int); 104241675Suqsstatic int mdoc_pmacro(struct mdoc *, int, char *, int); 105241675Suqs 106241675Suqsconst struct mdoc_node * 107261344Suqsmdoc_node(const struct mdoc *mdoc) 108241675Suqs{ 109241675Suqs 110261344Suqs assert( ! (MDOC_HALT & mdoc->flags)); 111261344Suqs return(mdoc->first); 112241675Suqs} 113241675Suqs 114241675Suqs 115241675Suqsconst struct mdoc_meta * 116261344Suqsmdoc_meta(const struct mdoc *mdoc) 117241675Suqs{ 118241675Suqs 119261344Suqs assert( ! (MDOC_HALT & mdoc->flags)); 120261344Suqs return(&mdoc->meta); 121241675Suqs} 122241675Suqs 123241675Suqs 124241675Suqs/* 125241675Suqs * Frees volatile resources (parse tree, meta-data, fields). 126241675Suqs */ 127241675Suqsstatic void 128241675Suqsmdoc_free1(struct mdoc *mdoc) 129241675Suqs{ 130241675Suqs 131241675Suqs if (mdoc->first) 132241675Suqs mdoc_node_delete(mdoc, mdoc->first); 133241675Suqs if (mdoc->meta.title) 134241675Suqs free(mdoc->meta.title); 135241675Suqs if (mdoc->meta.os) 136241675Suqs free(mdoc->meta.os); 137241675Suqs if (mdoc->meta.name) 138241675Suqs free(mdoc->meta.name); 139241675Suqs if (mdoc->meta.arch) 140241675Suqs free(mdoc->meta.arch); 141241675Suqs if (mdoc->meta.vol) 142241675Suqs free(mdoc->meta.vol); 143241675Suqs if (mdoc->meta.msec) 144241675Suqs free(mdoc->meta.msec); 145241675Suqs if (mdoc->meta.date) 146241675Suqs free(mdoc->meta.date); 147241675Suqs} 148241675Suqs 149241675Suqs 150241675Suqs/* 151241675Suqs * Allocate all volatile resources (parse tree, meta-data, fields). 152241675Suqs */ 153241675Suqsstatic void 154241675Suqsmdoc_alloc1(struct mdoc *mdoc) 155241675Suqs{ 156241675Suqs 157241675Suqs memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 158241675Suqs mdoc->flags = 0; 159241675Suqs mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 160241675Suqs mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 161241675Suqs mdoc->first = mdoc->last; 162241675Suqs mdoc->last->type = MDOC_ROOT; 163241675Suqs mdoc->last->tok = MDOC_MAX; 164241675Suqs mdoc->next = MDOC_NEXT_CHILD; 165241675Suqs} 166241675Suqs 167241675Suqs 168241675Suqs/* 169241675Suqs * Free up volatile resources (see mdoc_free1()) then re-initialises the 170241675Suqs * data with mdoc_alloc1(). After invocation, parse data has been reset 171241675Suqs * and the parser is ready for re-invocation on a new tree; however, 172241675Suqs * cross-parse non-volatile data is kept intact. 173241675Suqs */ 174241675Suqsvoid 175241675Suqsmdoc_reset(struct mdoc *mdoc) 176241675Suqs{ 177241675Suqs 178241675Suqs mdoc_free1(mdoc); 179241675Suqs mdoc_alloc1(mdoc); 180241675Suqs} 181241675Suqs 182241675Suqs 183241675Suqs/* 184241675Suqs * Completely free up all volatile and non-volatile parse resources. 185241675Suqs * After invocation, the pointer is no longer usable. 186241675Suqs */ 187241675Suqsvoid 188241675Suqsmdoc_free(struct mdoc *mdoc) 189241675Suqs{ 190241675Suqs 191241675Suqs mdoc_free1(mdoc); 192241675Suqs free(mdoc); 193241675Suqs} 194241675Suqs 195241675Suqs 196241675Suqs/* 197241675Suqs * Allocate volatile and non-volatile parse resources. 198241675Suqs */ 199241675Suqsstruct mdoc * 200261344Suqsmdoc_alloc(struct roff *roff, struct mparse *parse, char *defos) 201241675Suqs{ 202241675Suqs struct mdoc *p; 203241675Suqs 204241675Suqs p = mandoc_calloc(1, sizeof(struct mdoc)); 205241675Suqs 206241675Suqs p->parse = parse; 207261344Suqs p->defos = defos; 208241675Suqs p->roff = roff; 209241675Suqs 210241675Suqs mdoc_hash_init(); 211241675Suqs mdoc_alloc1(p); 212241675Suqs return(p); 213241675Suqs} 214241675Suqs 215241675Suqs 216241675Suqs/* 217241675Suqs * Climb back up the parse tree, validating open scopes. Mostly calls 218241675Suqs * through to macro_end() in macro.c. 219241675Suqs */ 220241675Suqsint 221261344Suqsmdoc_endparse(struct mdoc *mdoc) 222241675Suqs{ 223241675Suqs 224261344Suqs assert( ! (MDOC_HALT & mdoc->flags)); 225261344Suqs if (mdoc_macroend(mdoc)) 226241675Suqs return(1); 227261344Suqs mdoc->flags |= MDOC_HALT; 228241675Suqs return(0); 229241675Suqs} 230241675Suqs 231241675Suqsint 232261344Suqsmdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep) 233241675Suqs{ 234241675Suqs struct mdoc_node *n; 235241675Suqs 236261344Suqs assert( ! (MDOC_HALT & mdoc->flags)); 237241675Suqs 238241675Suqs /* No text before an initial macro. */ 239241675Suqs 240261344Suqs if (SEC_NONE == mdoc->lastnamed) { 241261344Suqs mdoc_pmsg(mdoc, ep->ln, ep->pos, MANDOCERR_NOTEXT); 242241675Suqs return(1); 243241675Suqs } 244241675Suqs 245261344Suqs n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 246241675Suqs n->eqn = ep; 247241675Suqs 248261344Suqs if ( ! node_append(mdoc, n)) 249241675Suqs return(0); 250241675Suqs 251261344Suqs mdoc->next = MDOC_NEXT_SIBLING; 252241675Suqs return(1); 253241675Suqs} 254241675Suqs 255241675Suqsint 256261344Suqsmdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp) 257241675Suqs{ 258241675Suqs struct mdoc_node *n; 259241675Suqs 260261344Suqs assert( ! (MDOC_HALT & mdoc->flags)); 261241675Suqs 262241675Suqs /* No text before an initial macro. */ 263241675Suqs 264261344Suqs if (SEC_NONE == mdoc->lastnamed) { 265261344Suqs mdoc_pmsg(mdoc, sp->line, 0, MANDOCERR_NOTEXT); 266241675Suqs return(1); 267241675Suqs } 268241675Suqs 269261344Suqs n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL); 270241675Suqs n->span = sp; 271241675Suqs 272261344Suqs if ( ! node_append(mdoc, n)) 273241675Suqs return(0); 274241675Suqs 275261344Suqs mdoc->next = MDOC_NEXT_SIBLING; 276241675Suqs return(1); 277241675Suqs} 278241675Suqs 279241675Suqs 280241675Suqs/* 281241675Suqs * Main parse routine. Parses a single line -- really just hands off to 282241675Suqs * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 283241675Suqs */ 284241675Suqsint 285261344Suqsmdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs) 286241675Suqs{ 287241675Suqs 288261344Suqs assert( ! (MDOC_HALT & mdoc->flags)); 289241675Suqs 290261344Suqs mdoc->flags |= MDOC_NEWLINE; 291241675Suqs 292241675Suqs /* 293241675Suqs * Let the roff nS register switch SYNOPSIS mode early, 294241675Suqs * such that the parser knows at all times 295241675Suqs * whether this mode is on or off. 296241675Suqs * Note that this mode is also switched by the Sh macro. 297241675Suqs */ 298261344Suqs if (roff_getreg(mdoc->roff, "nS")) 299261344Suqs mdoc->flags |= MDOC_SYNOPSIS; 300261344Suqs else 301261344Suqs mdoc->flags &= ~MDOC_SYNOPSIS; 302241675Suqs 303261344Suqs return(roff_getcontrol(mdoc->roff, buf, &offs) ? 304261344Suqs mdoc_pmacro(mdoc, ln, buf, offs) : 305261344Suqs mdoc_ptext(mdoc, ln, buf, offs)); 306241675Suqs} 307241675Suqs 308241675Suqsint 309241675Suqsmdoc_macro(MACRO_PROT_ARGS) 310241675Suqs{ 311241675Suqs assert(tok < MDOC_MAX); 312241675Suqs 313241675Suqs /* If we're in the body, deny prologue calls. */ 314241675Suqs 315241675Suqs if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 316261344Suqs MDOC_PBODY & mdoc->flags) { 317261344Suqs mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADBODY); 318241675Suqs return(1); 319241675Suqs } 320241675Suqs 321241675Suqs /* If we're in the prologue, deny "body" macros. */ 322241675Suqs 323241675Suqs if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 324261344Suqs ! (MDOC_PBODY & mdoc->flags)) { 325261344Suqs mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADPROLOG); 326261344Suqs if (NULL == mdoc->meta.msec) 327261344Suqs mdoc->meta.msec = mandoc_strdup("1"); 328261344Suqs if (NULL == mdoc->meta.title) 329261344Suqs mdoc->meta.title = mandoc_strdup("UNKNOWN"); 330261344Suqs if (NULL == mdoc->meta.vol) 331261344Suqs mdoc->meta.vol = mandoc_strdup("LOCAL"); 332261344Suqs if (NULL == mdoc->meta.os) 333261344Suqs mdoc->meta.os = mandoc_strdup("LOCAL"); 334261344Suqs if (NULL == mdoc->meta.date) 335261344Suqs mdoc->meta.date = mandoc_normdate 336261344Suqs (mdoc->parse, NULL, line, ppos); 337261344Suqs mdoc->flags |= MDOC_PBODY; 338241675Suqs } 339241675Suqs 340261344Suqs return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf)); 341241675Suqs} 342241675Suqs 343241675Suqs 344241675Suqsstatic int 345241675Suqsnode_append(struct mdoc *mdoc, struct mdoc_node *p) 346241675Suqs{ 347241675Suqs 348241675Suqs assert(mdoc->last); 349241675Suqs assert(mdoc->first); 350241675Suqs assert(MDOC_ROOT != p->type); 351241675Suqs 352241675Suqs switch (mdoc->next) { 353241675Suqs case (MDOC_NEXT_SIBLING): 354241675Suqs mdoc->last->next = p; 355241675Suqs p->prev = mdoc->last; 356241675Suqs p->parent = mdoc->last->parent; 357241675Suqs break; 358241675Suqs case (MDOC_NEXT_CHILD): 359241675Suqs mdoc->last->child = p; 360241675Suqs p->parent = mdoc->last; 361241675Suqs break; 362241675Suqs default: 363241675Suqs abort(); 364241675Suqs /* NOTREACHED */ 365241675Suqs } 366241675Suqs 367241675Suqs p->parent->nchild++; 368241675Suqs 369241675Suqs /* 370241675Suqs * Copy over the normalised-data pointer of our parent. Not 371241675Suqs * everybody has one, but copying a null pointer is fine. 372241675Suqs */ 373241675Suqs 374241675Suqs switch (p->type) { 375241675Suqs case (MDOC_BODY): 376261344Suqs if (ENDBODY_NOT != p->end) 377261344Suqs break; 378241675Suqs /* FALLTHROUGH */ 379241675Suqs case (MDOC_TAIL): 380241675Suqs /* FALLTHROUGH */ 381241675Suqs case (MDOC_HEAD): 382241675Suqs p->norm = p->parent->norm; 383241675Suqs break; 384241675Suqs default: 385241675Suqs break; 386241675Suqs } 387241675Suqs 388241675Suqs if ( ! mdoc_valid_pre(mdoc, p)) 389241675Suqs return(0); 390241675Suqs 391241675Suqs switch (p->type) { 392241675Suqs case (MDOC_HEAD): 393241675Suqs assert(MDOC_BLOCK == p->parent->type); 394241675Suqs p->parent->head = p; 395241675Suqs break; 396241675Suqs case (MDOC_TAIL): 397241675Suqs assert(MDOC_BLOCK == p->parent->type); 398241675Suqs p->parent->tail = p; 399241675Suqs break; 400241675Suqs case (MDOC_BODY): 401241675Suqs if (p->end) 402241675Suqs break; 403241675Suqs assert(MDOC_BLOCK == p->parent->type); 404241675Suqs p->parent->body = p; 405241675Suqs break; 406241675Suqs default: 407241675Suqs break; 408241675Suqs } 409241675Suqs 410241675Suqs mdoc->last = p; 411241675Suqs 412241675Suqs switch (p->type) { 413241675Suqs case (MDOC_TBL): 414241675Suqs /* FALLTHROUGH */ 415241675Suqs case (MDOC_TEXT): 416241675Suqs if ( ! mdoc_valid_post(mdoc)) 417241675Suqs return(0); 418241675Suqs break; 419241675Suqs default: 420241675Suqs break; 421241675Suqs } 422241675Suqs 423241675Suqs return(1); 424241675Suqs} 425241675Suqs 426241675Suqs 427241675Suqsstatic struct mdoc_node * 428261344Suqsnode_alloc(struct mdoc *mdoc, int line, int pos, 429241675Suqs enum mdoct tok, enum mdoc_type type) 430241675Suqs{ 431241675Suqs struct mdoc_node *p; 432241675Suqs 433241675Suqs p = mandoc_calloc(1, sizeof(struct mdoc_node)); 434261344Suqs p->sec = mdoc->lastsec; 435241675Suqs p->line = line; 436241675Suqs p->pos = pos; 437261344Suqs p->lastline = line; 438241675Suqs p->tok = tok; 439241675Suqs p->type = type; 440241675Suqs 441241675Suqs /* Flag analysis. */ 442241675Suqs 443261344Suqs if (MDOC_SYNOPSIS & mdoc->flags) 444241675Suqs p->flags |= MDOC_SYNPRETTY; 445241675Suqs else 446241675Suqs p->flags &= ~MDOC_SYNPRETTY; 447261344Suqs if (MDOC_NEWLINE & mdoc->flags) 448241675Suqs p->flags |= MDOC_LINE; 449261344Suqs mdoc->flags &= ~MDOC_NEWLINE; 450241675Suqs 451241675Suqs return(p); 452241675Suqs} 453241675Suqs 454241675Suqs 455241675Suqsint 456261344Suqsmdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 457241675Suqs{ 458241675Suqs struct mdoc_node *p; 459241675Suqs 460261344Suqs p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL); 461261344Suqs if ( ! node_append(mdoc, p)) 462241675Suqs return(0); 463261344Suqs mdoc->next = MDOC_NEXT_CHILD; 464241675Suqs return(1); 465241675Suqs} 466241675Suqs 467241675Suqs 468241675Suqsint 469261344Suqsmdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 470241675Suqs{ 471241675Suqs struct mdoc_node *p; 472241675Suqs 473261344Suqs assert(mdoc->first); 474261344Suqs assert(mdoc->last); 475241675Suqs 476261344Suqs p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD); 477261344Suqs if ( ! node_append(mdoc, p)) 478241675Suqs return(0); 479261344Suqs mdoc->next = MDOC_NEXT_CHILD; 480241675Suqs return(1); 481241675Suqs} 482241675Suqs 483241675Suqs 484241675Suqsint 485261344Suqsmdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok) 486241675Suqs{ 487241675Suqs struct mdoc_node *p; 488241675Suqs 489261344Suqs p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 490261344Suqs if ( ! node_append(mdoc, p)) 491241675Suqs return(0); 492261344Suqs mdoc->next = MDOC_NEXT_CHILD; 493241675Suqs return(1); 494241675Suqs} 495241675Suqs 496241675Suqs 497241675Suqsint 498261344Suqsmdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok, 499241675Suqs struct mdoc_node *body, enum mdoc_endbody end) 500241675Suqs{ 501241675Suqs struct mdoc_node *p; 502241675Suqs 503261344Suqs p = node_alloc(mdoc, line, pos, tok, MDOC_BODY); 504241675Suqs p->pending = body; 505261344Suqs p->norm = body->norm; 506241675Suqs p->end = end; 507261344Suqs if ( ! node_append(mdoc, p)) 508241675Suqs return(0); 509261344Suqs mdoc->next = MDOC_NEXT_SIBLING; 510241675Suqs return(1); 511241675Suqs} 512241675Suqs 513241675Suqs 514241675Suqsint 515261344Suqsmdoc_block_alloc(struct mdoc *mdoc, int line, int pos, 516241675Suqs enum mdoct tok, struct mdoc_arg *args) 517241675Suqs{ 518241675Suqs struct mdoc_node *p; 519241675Suqs 520261344Suqs p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK); 521241675Suqs p->args = args; 522241675Suqs if (p->args) 523241675Suqs (args->refcnt)++; 524241675Suqs 525241675Suqs switch (tok) { 526241675Suqs case (MDOC_Bd): 527241675Suqs /* FALLTHROUGH */ 528241675Suqs case (MDOC_Bf): 529241675Suqs /* FALLTHROUGH */ 530241675Suqs case (MDOC_Bl): 531241675Suqs /* FALLTHROUGH */ 532241675Suqs case (MDOC_Rs): 533241675Suqs p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 534241675Suqs break; 535241675Suqs default: 536241675Suqs break; 537241675Suqs } 538241675Suqs 539261344Suqs if ( ! node_append(mdoc, p)) 540241675Suqs return(0); 541261344Suqs mdoc->next = MDOC_NEXT_CHILD; 542241675Suqs return(1); 543241675Suqs} 544241675Suqs 545241675Suqs 546241675Suqsint 547261344Suqsmdoc_elem_alloc(struct mdoc *mdoc, int line, int pos, 548241675Suqs enum mdoct tok, struct mdoc_arg *args) 549241675Suqs{ 550241675Suqs struct mdoc_node *p; 551241675Suqs 552261344Suqs p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM); 553241675Suqs p->args = args; 554241675Suqs if (p->args) 555241675Suqs (args->refcnt)++; 556241675Suqs 557241675Suqs switch (tok) { 558241675Suqs case (MDOC_An): 559241675Suqs p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 560241675Suqs break; 561241675Suqs default: 562241675Suqs break; 563241675Suqs } 564241675Suqs 565261344Suqs if ( ! node_append(mdoc, p)) 566241675Suqs return(0); 567261344Suqs mdoc->next = MDOC_NEXT_CHILD; 568241675Suqs return(1); 569241675Suqs} 570241675Suqs 571241675Suqsint 572261344Suqsmdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p) 573241675Suqs{ 574241675Suqs struct mdoc_node *n; 575241675Suqs 576261344Suqs n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT); 577261344Suqs n->string = roff_strdup(mdoc->roff, p); 578241675Suqs 579261344Suqs if ( ! node_append(mdoc, n)) 580241675Suqs return(0); 581241675Suqs 582261344Suqs mdoc->next = MDOC_NEXT_SIBLING; 583241675Suqs return(1); 584241675Suqs} 585241675Suqs 586261344Suqsvoid 587261344Suqsmdoc_word_append(struct mdoc *mdoc, const char *p) 588261344Suqs{ 589261344Suqs struct mdoc_node *n; 590261344Suqs char *addstr, *newstr; 591241675Suqs 592261344Suqs n = mdoc->last; 593261344Suqs addstr = roff_strdup(mdoc->roff, p); 594261344Suqs if (-1 == asprintf(&newstr, "%s %s", n->string, addstr)) { 595261344Suqs perror(NULL); 596261344Suqs exit((int)MANDOCLEVEL_SYSERR); 597261344Suqs } 598261344Suqs free(addstr); 599261344Suqs free(n->string); 600261344Suqs n->string = newstr; 601261344Suqs mdoc->next = MDOC_NEXT_SIBLING; 602261344Suqs} 603261344Suqs 604241675Suqsstatic void 605241675Suqsmdoc_node_free(struct mdoc_node *p) 606241675Suqs{ 607241675Suqs 608241675Suqs if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 609241675Suqs free(p->norm); 610241675Suqs if (p->string) 611241675Suqs free(p->string); 612241675Suqs if (p->args) 613241675Suqs mdoc_argv_free(p->args); 614241675Suqs free(p); 615241675Suqs} 616241675Suqs 617241675Suqs 618241675Suqsstatic void 619261344Suqsmdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n) 620241675Suqs{ 621241675Suqs 622241675Suqs /* Adjust siblings. */ 623241675Suqs 624241675Suqs if (n->prev) 625241675Suqs n->prev->next = n->next; 626241675Suqs if (n->next) 627241675Suqs n->next->prev = n->prev; 628241675Suqs 629241675Suqs /* Adjust parent. */ 630241675Suqs 631241675Suqs if (n->parent) { 632241675Suqs n->parent->nchild--; 633241675Suqs if (n->parent->child == n) 634241675Suqs n->parent->child = n->prev ? n->prev : n->next; 635241675Suqs if (n->parent->last == n) 636241675Suqs n->parent->last = n->prev ? n->prev : NULL; 637241675Suqs } 638241675Suqs 639241675Suqs /* Adjust parse point, if applicable. */ 640241675Suqs 641261344Suqs if (mdoc && mdoc->last == n) { 642241675Suqs if (n->prev) { 643261344Suqs mdoc->last = n->prev; 644261344Suqs mdoc->next = MDOC_NEXT_SIBLING; 645241675Suqs } else { 646261344Suqs mdoc->last = n->parent; 647261344Suqs mdoc->next = MDOC_NEXT_CHILD; 648241675Suqs } 649241675Suqs } 650241675Suqs 651261344Suqs if (mdoc && mdoc->first == n) 652261344Suqs mdoc->first = NULL; 653241675Suqs} 654241675Suqs 655241675Suqs 656241675Suqsvoid 657261344Suqsmdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p) 658241675Suqs{ 659241675Suqs 660241675Suqs while (p->child) { 661241675Suqs assert(p->nchild); 662261344Suqs mdoc_node_delete(mdoc, p->child); 663241675Suqs } 664241675Suqs assert(0 == p->nchild); 665241675Suqs 666261344Suqs mdoc_node_unlink(mdoc, p); 667241675Suqs mdoc_node_free(p); 668241675Suqs} 669241675Suqs 670261344Suqsint 671261344Suqsmdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p) 672261344Suqs{ 673261344Suqs 674261344Suqs mdoc_node_unlink(mdoc, p); 675261344Suqs return(node_append(mdoc, p)); 676261344Suqs} 677261344Suqs 678241675Suqs#if 0 679241675Suqs/* 680241675Suqs * Pre-treat a text line. 681241675Suqs * Text lines can consist of equations, which must be handled apart from 682241675Suqs * the regular text. 683241675Suqs * Thus, use this function to step through a line checking if it has any 684241675Suqs * equations embedded in it. 685241675Suqs * This must handle multiple equations AND equations that do not end at 686241675Suqs * the end-of-line, i.e., will re-enter in the next roff parse. 687241675Suqs */ 688241675Suqsstatic int 689261344Suqsmdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs) 690241675Suqs{ 691241675Suqs char *start, *end; 692241675Suqs char delim; 693241675Suqs 694241675Suqs while ('\0' != buf[offs]) { 695241675Suqs /* Mark starting position if eqn is set. */ 696241675Suqs start = NULL; 697261344Suqs if ('\0' != (delim = roff_eqndelim(mdoc->roff))) 698241675Suqs if (NULL != (start = strchr(buf + offs, delim))) 699241675Suqs *start++ = '\0'; 700241675Suqs 701241675Suqs /* Parse text as normal. */ 702261344Suqs if ( ! mdoc_ptext(mdoc, line, buf, offs)) 703241675Suqs return(0); 704241675Suqs 705241675Suqs /* Continue only if an equation exists. */ 706241675Suqs if (NULL == start) 707241675Suqs break; 708241675Suqs 709241675Suqs /* Read past the end of the equation. */ 710241675Suqs offs += start - (buf + offs); 711241675Suqs assert(start == &buf[offs]); 712241675Suqs if (NULL != (end = strchr(buf + offs, delim))) { 713241675Suqs *end++ = '\0'; 714241675Suqs while (' ' == *end) 715241675Suqs end++; 716241675Suqs } 717241675Suqs 718241675Suqs /* Parse the equation itself. */ 719261344Suqs roff_openeqn(mdoc->roff, NULL, line, offs, buf); 720241675Suqs 721241675Suqs /* Process a finished equation? */ 722261344Suqs if (roff_closeeqn(mdoc->roff)) 723261344Suqs if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff))) 724241675Suqs return(0); 725241675Suqs offs += (end - (buf + offs)); 726241675Suqs } 727241675Suqs 728241675Suqs return(1); 729241675Suqs} 730241675Suqs#endif 731241675Suqs 732241675Suqs/* 733241675Suqs * Parse free-form text, that is, a line that does not begin with the 734241675Suqs * control character. 735241675Suqs */ 736241675Suqsstatic int 737261344Suqsmdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs) 738241675Suqs{ 739241675Suqs char *c, *ws, *end; 740241675Suqs struct mdoc_node *n; 741241675Suqs 742241675Suqs /* No text before an initial macro. */ 743241675Suqs 744261344Suqs if (SEC_NONE == mdoc->lastnamed) { 745261344Suqs mdoc_pmsg(mdoc, line, offs, MANDOCERR_NOTEXT); 746241675Suqs return(1); 747241675Suqs } 748241675Suqs 749261344Suqs assert(mdoc->last); 750261344Suqs n = mdoc->last; 751241675Suqs 752241675Suqs /* 753241675Suqs * Divert directly to list processing if we're encountering a 754241675Suqs * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 755241675Suqs * (a MDOC_BODY means it's already open, in which case we should 756241675Suqs * process within its context in the normal way). 757241675Suqs */ 758241675Suqs 759241675Suqs if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 760241675Suqs LIST_column == n->norm->Bl.type) { 761241675Suqs /* `Bl' is open without any children. */ 762261344Suqs mdoc->flags |= MDOC_FREECOL; 763261344Suqs return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 764241675Suqs } 765241675Suqs 766241675Suqs if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 767241675Suqs NULL != n->parent && 768241675Suqs MDOC_Bl == n->parent->tok && 769241675Suqs LIST_column == n->parent->norm->Bl.type) { 770241675Suqs /* `Bl' has block-level `It' children. */ 771261344Suqs mdoc->flags |= MDOC_FREECOL; 772261344Suqs return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf)); 773241675Suqs } 774241675Suqs 775241675Suqs /* 776241675Suqs * Search for the beginning of unescaped trailing whitespace (ws) 777241675Suqs * and for the first character not to be output (end). 778241675Suqs */ 779241675Suqs 780241675Suqs /* FIXME: replace with strcspn(). */ 781241675Suqs ws = NULL; 782241675Suqs for (c = end = buf + offs; *c; c++) { 783241675Suqs switch (*c) { 784241675Suqs case ' ': 785241675Suqs if (NULL == ws) 786241675Suqs ws = c; 787241675Suqs continue; 788241675Suqs case '\t': 789241675Suqs /* 790241675Suqs * Always warn about trailing tabs, 791241675Suqs * even outside literal context, 792241675Suqs * where they should be put on the next line. 793241675Suqs */ 794241675Suqs if (NULL == ws) 795241675Suqs ws = c; 796241675Suqs /* 797241675Suqs * Strip trailing tabs in literal context only; 798241675Suqs * outside, they affect the next line. 799241675Suqs */ 800261344Suqs if (MDOC_LITERAL & mdoc->flags) 801241675Suqs continue; 802241675Suqs break; 803241675Suqs case '\\': 804241675Suqs /* Skip the escaped character, too, if any. */ 805241675Suqs if (c[1]) 806241675Suqs c++; 807241675Suqs /* FALLTHROUGH */ 808241675Suqs default: 809241675Suqs ws = NULL; 810241675Suqs break; 811241675Suqs } 812241675Suqs end = c + 1; 813241675Suqs } 814241675Suqs *end = '\0'; 815241675Suqs 816241675Suqs if (ws) 817261344Suqs mdoc_pmsg(mdoc, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 818241675Suqs 819261344Suqs if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) { 820261344Suqs mdoc_pmsg(mdoc, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 821241675Suqs 822241675Suqs /* 823241675Suqs * Insert a `sp' in the case of a blank line. Technically, 824241675Suqs * blank lines aren't allowed, but enough manuals assume this 825241675Suqs * behaviour that we want to work around it. 826241675Suqs */ 827261344Suqs if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL)) 828241675Suqs return(0); 829241675Suqs 830261344Suqs mdoc->next = MDOC_NEXT_SIBLING; 831261344Suqs 832261344Suqs return(mdoc_valid_post(mdoc)); 833241675Suqs } 834241675Suqs 835261344Suqs if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs)) 836241675Suqs return(0); 837241675Suqs 838261344Suqs if (MDOC_LITERAL & mdoc->flags) 839241675Suqs return(1); 840241675Suqs 841241675Suqs /* 842241675Suqs * End-of-sentence check. If the last character is an unescaped 843241675Suqs * EOS character, then flag the node as being the end of a 844241675Suqs * sentence. The front-end will know how to interpret this. 845241675Suqs */ 846241675Suqs 847241675Suqs assert(buf < end); 848241675Suqs 849241675Suqs if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0)) 850261344Suqs mdoc->last->flags |= MDOC_EOS; 851241675Suqs 852241675Suqs return(1); 853241675Suqs} 854241675Suqs 855241675Suqs 856241675Suqs/* 857241675Suqs * Parse a macro line, that is, a line beginning with the control 858241675Suqs * character. 859241675Suqs */ 860241675Suqsstatic int 861261344Suqsmdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs) 862241675Suqs{ 863241675Suqs enum mdoct tok; 864241675Suqs int i, sv; 865241675Suqs char mac[5]; 866241675Suqs struct mdoc_node *n; 867241675Suqs 868241675Suqs /* Empty post-control lines are ignored. */ 869241675Suqs 870241675Suqs if ('"' == buf[offs]) { 871261344Suqs mdoc_pmsg(mdoc, ln, offs, MANDOCERR_BADCOMMENT); 872241675Suqs return(1); 873241675Suqs } else if ('\0' == buf[offs]) 874241675Suqs return(1); 875241675Suqs 876241675Suqs sv = offs; 877241675Suqs 878241675Suqs /* 879241675Suqs * Copy the first word into a nil-terminated buffer. 880241675Suqs * Stop copying when a tab, space, or eoln is encountered. 881241675Suqs */ 882241675Suqs 883241675Suqs i = 0; 884241675Suqs while (i < 4 && '\0' != buf[offs] && 885241675Suqs ' ' != buf[offs] && '\t' != buf[offs]) 886241675Suqs mac[i++] = buf[offs++]; 887241675Suqs 888241675Suqs mac[i] = '\0'; 889241675Suqs 890241675Suqs tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 891241675Suqs 892241675Suqs if (MDOC_MAX == tok) { 893261344Suqs mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse, 894241675Suqs ln, sv, "%s", buf + sv - 1); 895241675Suqs return(1); 896241675Suqs } 897241675Suqs 898241675Suqs /* Disregard the first trailing tab, if applicable. */ 899241675Suqs 900241675Suqs if ('\t' == buf[offs]) 901241675Suqs offs++; 902241675Suqs 903241675Suqs /* Jump to the next non-whitespace word. */ 904241675Suqs 905241675Suqs while (buf[offs] && ' ' == buf[offs]) 906241675Suqs offs++; 907241675Suqs 908241675Suqs /* 909241675Suqs * Trailing whitespace. Note that tabs are allowed to be passed 910241675Suqs * into the parser as "text", so we only warn about spaces here. 911241675Suqs */ 912241675Suqs 913241675Suqs if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 914261344Suqs mdoc_pmsg(mdoc, ln, offs - 1, MANDOCERR_EOLNSPACE); 915241675Suqs 916241675Suqs /* 917241675Suqs * If an initial macro or a list invocation, divert directly 918241675Suqs * into macro processing. 919241675Suqs */ 920241675Suqs 921261344Suqs if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 922261344Suqs if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 923241675Suqs goto err; 924241675Suqs return(1); 925241675Suqs } 926241675Suqs 927261344Suqs n = mdoc->last; 928261344Suqs assert(mdoc->last); 929241675Suqs 930241675Suqs /* 931241675Suqs * If the first macro of a `Bl -column', open an `It' block 932241675Suqs * context around the parsed macro. 933241675Suqs */ 934241675Suqs 935241675Suqs if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 936241675Suqs LIST_column == n->norm->Bl.type) { 937261344Suqs mdoc->flags |= MDOC_FREECOL; 938261344Suqs if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 939241675Suqs goto err; 940241675Suqs return(1); 941241675Suqs } 942241675Suqs 943241675Suqs /* 944241675Suqs * If we're following a block-level `It' within a `Bl -column' 945241675Suqs * context (perhaps opened in the above block or in ptext()), 946241675Suqs * then open an `It' block context around the parsed macro. 947241675Suqs */ 948241675Suqs 949241675Suqs if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 950241675Suqs NULL != n->parent && 951241675Suqs MDOC_Bl == n->parent->tok && 952241675Suqs LIST_column == n->parent->norm->Bl.type) { 953261344Suqs mdoc->flags |= MDOC_FREECOL; 954261344Suqs if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf)) 955241675Suqs goto err; 956241675Suqs return(1); 957241675Suqs } 958241675Suqs 959241675Suqs /* Normal processing of a macro. */ 960241675Suqs 961261344Suqs if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf)) 962241675Suqs goto err; 963241675Suqs 964241675Suqs return(1); 965241675Suqs 966241675Suqserr: /* Error out. */ 967241675Suqs 968261344Suqs mdoc->flags |= MDOC_HALT; 969241675Suqs return(0); 970241675Suqs} 971241675Suqs 972241675Suqsenum mdelim 973241675Suqsmdoc_isdelim(const char *p) 974241675Suqs{ 975241675Suqs 976241675Suqs if ('\0' == p[0]) 977241675Suqs return(DELIM_NONE); 978241675Suqs 979241675Suqs if ('\0' == p[1]) 980241675Suqs switch (p[0]) { 981241675Suqs case('('): 982241675Suqs /* FALLTHROUGH */ 983241675Suqs case('['): 984241675Suqs return(DELIM_OPEN); 985241675Suqs case('|'): 986241675Suqs return(DELIM_MIDDLE); 987241675Suqs case('.'): 988241675Suqs /* FALLTHROUGH */ 989241675Suqs case(','): 990241675Suqs /* FALLTHROUGH */ 991241675Suqs case(';'): 992241675Suqs /* FALLTHROUGH */ 993241675Suqs case(':'): 994241675Suqs /* FALLTHROUGH */ 995241675Suqs case('?'): 996241675Suqs /* FALLTHROUGH */ 997241675Suqs case('!'): 998241675Suqs /* FALLTHROUGH */ 999241675Suqs case(')'): 1000241675Suqs /* FALLTHROUGH */ 1001241675Suqs case(']'): 1002241675Suqs return(DELIM_CLOSE); 1003241675Suqs default: 1004241675Suqs return(DELIM_NONE); 1005241675Suqs } 1006241675Suqs 1007241675Suqs if ('\\' != p[0]) 1008241675Suqs return(DELIM_NONE); 1009241675Suqs 1010241675Suqs if (0 == strcmp(p + 1, ".")) 1011241675Suqs return(DELIM_CLOSE); 1012261344Suqs if (0 == strcmp(p + 1, "fR|\\fP")) 1013241675Suqs return(DELIM_MIDDLE); 1014241675Suqs 1015241675Suqs return(DELIM_NONE); 1016241675Suqs} 1017