1241675Suqs/* $Id: mdoc.c,v 1.196 2011/09/30 00:13:28 schwarze Exp $ */ 2241675Suqs/* 3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4241675Suqs * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> 5241675Suqs * 6241675Suqs * Permission to use, copy, modify, and distribute this software for any 7241675Suqs * purpose with or without fee is hereby granted, provided that the above 8241675Suqs * copyright notice and this permission notice appear in all copies. 9241675Suqs * 10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17241675Suqs */ 18241675Suqs#ifdef HAVE_CONFIG_H 19241675Suqs#include "config.h" 20241675Suqs#endif 21241675Suqs 22241675Suqs#include <sys/types.h> 23241675Suqs 24241675Suqs#include <assert.h> 25241675Suqs#include <stdarg.h> 26241675Suqs#include <stdio.h> 27241675Suqs#include <stdlib.h> 28241675Suqs#include <string.h> 29241675Suqs#include <time.h> 30241675Suqs 31241675Suqs#include "mdoc.h" 32241675Suqs#include "mandoc.h" 33241675Suqs#include "libmdoc.h" 34241675Suqs#include "libmandoc.h" 35241675Suqs 36241675Suqsconst char *const __mdoc_macronames[MDOC_MAX] = { 37241675Suqs "Ap", "Dd", "Dt", "Os", 38241675Suqs "Sh", "Ss", "Pp", "D1", 39241675Suqs "Dl", "Bd", "Ed", "Bl", 40241675Suqs "El", "It", "Ad", "An", 41241675Suqs "Ar", "Cd", "Cm", "Dv", 42241675Suqs "Er", "Ev", "Ex", "Fa", 43241675Suqs "Fd", "Fl", "Fn", "Ft", 44241675Suqs "Ic", "In", "Li", "Nd", 45241675Suqs "Nm", "Op", "Ot", "Pa", 46241675Suqs "Rv", "St", "Va", "Vt", 47241675Suqs /* LINTED */ 48241675Suqs "Xr", "%A", "%B", "%D", 49241675Suqs /* LINTED */ 50241675Suqs "%I", "%J", "%N", "%O", 51241675Suqs /* LINTED */ 52241675Suqs "%P", "%R", "%T", "%V", 53241675Suqs "Ac", "Ao", "Aq", "At", 54241675Suqs "Bc", "Bf", "Bo", "Bq", 55241675Suqs "Bsx", "Bx", "Db", "Dc", 56241675Suqs "Do", "Dq", "Ec", "Ef", 57241675Suqs "Em", "Eo", "Fx", "Ms", 58241675Suqs "No", "Ns", "Nx", "Ox", 59241675Suqs "Pc", "Pf", "Po", "Pq", 60241675Suqs "Qc", "Ql", "Qo", "Qq", 61241675Suqs "Re", "Rs", "Sc", "So", 62241675Suqs "Sq", "Sm", "Sx", "Sy", 63241675Suqs "Tn", "Ux", "Xc", "Xo", 64241675Suqs "Fo", "Fc", "Oo", "Oc", 65241675Suqs "Bk", "Ek", "Bt", "Hf", 66241675Suqs "Fr", "Ud", "Lb", "Lp", 67241675Suqs "Lk", "Mt", "Brq", "Bro", 68241675Suqs /* LINTED */ 69241675Suqs "Brc", "%C", "Es", "En", 70241675Suqs /* LINTED */ 71241675Suqs "Dx", "%Q", "br", "sp", 72241675Suqs /* LINTED */ 73241675Suqs "%U", "Ta" 74241675Suqs }; 75241675Suqs 76241675Suqsconst char *const __mdoc_argnames[MDOC_ARG_MAX] = { 77241675Suqs "split", "nosplit", "ragged", 78241675Suqs "unfilled", "literal", "file", 79241675Suqs "offset", "bullet", "dash", 80241675Suqs "hyphen", "item", "enum", 81241675Suqs "tag", "diag", "hang", 82241675Suqs "ohang", "inset", "column", 83241675Suqs "width", "compact", "std", 84241675Suqs "filled", "words", "emphasis", 85241675Suqs "symbolic", "nested", "centered" 86241675Suqs }; 87241675Suqs 88241675Suqsconst char * const *mdoc_macronames = __mdoc_macronames; 89241675Suqsconst char * const *mdoc_argnames = __mdoc_argnames; 90241675Suqs 91241675Suqsstatic void mdoc_node_free(struct mdoc_node *); 92241675Suqsstatic void mdoc_node_unlink(struct mdoc *, 93241675Suqs struct mdoc_node *); 94241675Suqsstatic void mdoc_free1(struct mdoc *); 95241675Suqsstatic void mdoc_alloc1(struct mdoc *); 96241675Suqsstatic struct mdoc_node *node_alloc(struct mdoc *, int, int, 97241675Suqs enum mdoct, enum mdoc_type); 98241675Suqsstatic int node_append(struct mdoc *, 99241675Suqs struct mdoc_node *); 100241675Suqs#if 0 101241675Suqsstatic int mdoc_preptext(struct mdoc *, int, char *, int); 102241675Suqs#endif 103241675Suqsstatic int mdoc_ptext(struct mdoc *, int, char *, int); 104241675Suqsstatic int mdoc_pmacro(struct mdoc *, int, char *, int); 105241675Suqs 106241675Suqsconst struct mdoc_node * 107241675Suqsmdoc_node(const struct mdoc *m) 108241675Suqs{ 109241675Suqs 110241675Suqs assert( ! (MDOC_HALT & m->flags)); 111241675Suqs return(m->first); 112241675Suqs} 113241675Suqs 114241675Suqs 115241675Suqsconst struct mdoc_meta * 116241675Suqsmdoc_meta(const struct mdoc *m) 117241675Suqs{ 118241675Suqs 119241675Suqs assert( ! (MDOC_HALT & m->flags)); 120241675Suqs return(&m->meta); 121241675Suqs} 122241675Suqs 123241675Suqs 124241675Suqs/* 125241675Suqs * Frees volatile resources (parse tree, meta-data, fields). 126241675Suqs */ 127241675Suqsstatic void 128241675Suqsmdoc_free1(struct mdoc *mdoc) 129241675Suqs{ 130241675Suqs 131241675Suqs if (mdoc->first) 132241675Suqs mdoc_node_delete(mdoc, mdoc->first); 133241675Suqs if (mdoc->meta.title) 134241675Suqs free(mdoc->meta.title); 135241675Suqs if (mdoc->meta.os) 136241675Suqs free(mdoc->meta.os); 137241675Suqs if (mdoc->meta.name) 138241675Suqs free(mdoc->meta.name); 139241675Suqs if (mdoc->meta.arch) 140241675Suqs free(mdoc->meta.arch); 141241675Suqs if (mdoc->meta.vol) 142241675Suqs free(mdoc->meta.vol); 143241675Suqs if (mdoc->meta.msec) 144241675Suqs free(mdoc->meta.msec); 145241675Suqs if (mdoc->meta.date) 146241675Suqs free(mdoc->meta.date); 147241675Suqs} 148241675Suqs 149241675Suqs 150241675Suqs/* 151241675Suqs * Allocate all volatile resources (parse tree, meta-data, fields). 152241675Suqs */ 153241675Suqsstatic void 154241675Suqsmdoc_alloc1(struct mdoc *mdoc) 155241675Suqs{ 156241675Suqs 157241675Suqs memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); 158241675Suqs mdoc->flags = 0; 159241675Suqs mdoc->lastnamed = mdoc->lastsec = SEC_NONE; 160241675Suqs mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); 161241675Suqs mdoc->first = mdoc->last; 162241675Suqs mdoc->last->type = MDOC_ROOT; 163241675Suqs mdoc->last->tok = MDOC_MAX; 164241675Suqs mdoc->next = MDOC_NEXT_CHILD; 165241675Suqs} 166241675Suqs 167241675Suqs 168241675Suqs/* 169241675Suqs * Free up volatile resources (see mdoc_free1()) then re-initialises the 170241675Suqs * data with mdoc_alloc1(). After invocation, parse data has been reset 171241675Suqs * and the parser is ready for re-invocation on a new tree; however, 172241675Suqs * cross-parse non-volatile data is kept intact. 173241675Suqs */ 174241675Suqsvoid 175241675Suqsmdoc_reset(struct mdoc *mdoc) 176241675Suqs{ 177241675Suqs 178241675Suqs mdoc_free1(mdoc); 179241675Suqs mdoc_alloc1(mdoc); 180241675Suqs} 181241675Suqs 182241675Suqs 183241675Suqs/* 184241675Suqs * Completely free up all volatile and non-volatile parse resources. 185241675Suqs * After invocation, the pointer is no longer usable. 186241675Suqs */ 187241675Suqsvoid 188241675Suqsmdoc_free(struct mdoc *mdoc) 189241675Suqs{ 190241675Suqs 191241675Suqs mdoc_free1(mdoc); 192241675Suqs free(mdoc); 193241675Suqs} 194241675Suqs 195241675Suqs 196241675Suqs/* 197241675Suqs * Allocate volatile and non-volatile parse resources. 198241675Suqs */ 199241675Suqsstruct mdoc * 200241675Suqsmdoc_alloc(struct roff *roff, struct mparse *parse) 201241675Suqs{ 202241675Suqs struct mdoc *p; 203241675Suqs 204241675Suqs p = mandoc_calloc(1, sizeof(struct mdoc)); 205241675Suqs 206241675Suqs p->parse = parse; 207241675Suqs p->roff = roff; 208241675Suqs 209241675Suqs mdoc_hash_init(); 210241675Suqs mdoc_alloc1(p); 211241675Suqs return(p); 212241675Suqs} 213241675Suqs 214241675Suqs 215241675Suqs/* 216241675Suqs * Climb back up the parse tree, validating open scopes. Mostly calls 217241675Suqs * through to macro_end() in macro.c. 218241675Suqs */ 219241675Suqsint 220241675Suqsmdoc_endparse(struct mdoc *m) 221241675Suqs{ 222241675Suqs 223241675Suqs assert( ! (MDOC_HALT & m->flags)); 224241675Suqs if (mdoc_macroend(m)) 225241675Suqs return(1); 226241675Suqs m->flags |= MDOC_HALT; 227241675Suqs return(0); 228241675Suqs} 229241675Suqs 230241675Suqsint 231241675Suqsmdoc_addeqn(struct mdoc *m, const struct eqn *ep) 232241675Suqs{ 233241675Suqs struct mdoc_node *n; 234241675Suqs 235241675Suqs assert( ! (MDOC_HALT & m->flags)); 236241675Suqs 237241675Suqs /* No text before an initial macro. */ 238241675Suqs 239241675Suqs if (SEC_NONE == m->lastnamed) { 240241675Suqs mdoc_pmsg(m, ep->ln, ep->pos, MANDOCERR_NOTEXT); 241241675Suqs return(1); 242241675Suqs } 243241675Suqs 244241675Suqs n = node_alloc(m, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); 245241675Suqs n->eqn = ep; 246241675Suqs 247241675Suqs if ( ! node_append(m, n)) 248241675Suqs return(0); 249241675Suqs 250241675Suqs m->next = MDOC_NEXT_SIBLING; 251241675Suqs return(1); 252241675Suqs} 253241675Suqs 254241675Suqsint 255241675Suqsmdoc_addspan(struct mdoc *m, const struct tbl_span *sp) 256241675Suqs{ 257241675Suqs struct mdoc_node *n; 258241675Suqs 259241675Suqs assert( ! (MDOC_HALT & m->flags)); 260241675Suqs 261241675Suqs /* No text before an initial macro. */ 262241675Suqs 263241675Suqs if (SEC_NONE == m->lastnamed) { 264241675Suqs mdoc_pmsg(m, sp->line, 0, MANDOCERR_NOTEXT); 265241675Suqs return(1); 266241675Suqs } 267241675Suqs 268241675Suqs n = node_alloc(m, sp->line, 0, MDOC_MAX, MDOC_TBL); 269241675Suqs n->span = sp; 270241675Suqs 271241675Suqs if ( ! node_append(m, n)) 272241675Suqs return(0); 273241675Suqs 274241675Suqs m->next = MDOC_NEXT_SIBLING; 275241675Suqs return(1); 276241675Suqs} 277241675Suqs 278241675Suqs 279241675Suqs/* 280241675Suqs * Main parse routine. Parses a single line -- really just hands off to 281241675Suqs * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 282241675Suqs */ 283241675Suqsint 284241675Suqsmdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) 285241675Suqs{ 286241675Suqs 287241675Suqs assert( ! (MDOC_HALT & m->flags)); 288241675Suqs 289241675Suqs m->flags |= MDOC_NEWLINE; 290241675Suqs 291241675Suqs /* 292241675Suqs * Let the roff nS register switch SYNOPSIS mode early, 293241675Suqs * such that the parser knows at all times 294241675Suqs * whether this mode is on or off. 295241675Suqs * Note that this mode is also switched by the Sh macro. 296241675Suqs */ 297241675Suqs if (roff_regisset(m->roff, REG_nS)) { 298241675Suqs if (roff_regget(m->roff, REG_nS)) 299241675Suqs m->flags |= MDOC_SYNOPSIS; 300241675Suqs else 301241675Suqs m->flags &= ~MDOC_SYNOPSIS; 302241675Suqs } 303241675Suqs 304241675Suqs return(mandoc_getcontrol(buf, &offs) ? 305241675Suqs mdoc_pmacro(m, ln, buf, offs) : 306241675Suqs mdoc_ptext(m, ln, buf, offs)); 307241675Suqs} 308241675Suqs 309241675Suqsint 310241675Suqsmdoc_macro(MACRO_PROT_ARGS) 311241675Suqs{ 312241675Suqs assert(tok < MDOC_MAX); 313241675Suqs 314241675Suqs /* If we're in the body, deny prologue calls. */ 315241675Suqs 316241675Suqs if (MDOC_PROLOGUE & mdoc_macros[tok].flags && 317241675Suqs MDOC_PBODY & m->flags) { 318241675Suqs mdoc_pmsg(m, line, ppos, MANDOCERR_BADBODY); 319241675Suqs return(1); 320241675Suqs } 321241675Suqs 322241675Suqs /* If we're in the prologue, deny "body" macros. */ 323241675Suqs 324241675Suqs if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && 325241675Suqs ! (MDOC_PBODY & m->flags)) { 326241675Suqs mdoc_pmsg(m, line, ppos, MANDOCERR_BADPROLOG); 327241675Suqs if (NULL == m->meta.msec) 328241675Suqs m->meta.msec = mandoc_strdup("1"); 329241675Suqs if (NULL == m->meta.title) 330241675Suqs m->meta.title = mandoc_strdup("UNKNOWN"); 331241675Suqs if (NULL == m->meta.vol) 332241675Suqs m->meta.vol = mandoc_strdup("LOCAL"); 333241675Suqs if (NULL == m->meta.os) 334241675Suqs m->meta.os = mandoc_strdup("LOCAL"); 335241675Suqs if (NULL == m->meta.date) 336241675Suqs m->meta.date = mandoc_normdate 337241675Suqs (m->parse, NULL, line, ppos); 338241675Suqs m->flags |= MDOC_PBODY; 339241675Suqs } 340241675Suqs 341241675Suqs return((*mdoc_macros[tok].fp)(m, tok, line, ppos, pos, buf)); 342241675Suqs} 343241675Suqs 344241675Suqs 345241675Suqsstatic int 346241675Suqsnode_append(struct mdoc *mdoc, struct mdoc_node *p) 347241675Suqs{ 348241675Suqs 349241675Suqs assert(mdoc->last); 350241675Suqs assert(mdoc->first); 351241675Suqs assert(MDOC_ROOT != p->type); 352241675Suqs 353241675Suqs switch (mdoc->next) { 354241675Suqs case (MDOC_NEXT_SIBLING): 355241675Suqs mdoc->last->next = p; 356241675Suqs p->prev = mdoc->last; 357241675Suqs p->parent = mdoc->last->parent; 358241675Suqs break; 359241675Suqs case (MDOC_NEXT_CHILD): 360241675Suqs mdoc->last->child = p; 361241675Suqs p->parent = mdoc->last; 362241675Suqs break; 363241675Suqs default: 364241675Suqs abort(); 365241675Suqs /* NOTREACHED */ 366241675Suqs } 367241675Suqs 368241675Suqs p->parent->nchild++; 369241675Suqs 370241675Suqs /* 371241675Suqs * Copy over the normalised-data pointer of our parent. Not 372241675Suqs * everybody has one, but copying a null pointer is fine. 373241675Suqs */ 374241675Suqs 375241675Suqs switch (p->type) { 376241675Suqs case (MDOC_BODY): 377241675Suqs /* FALLTHROUGH */ 378241675Suqs case (MDOC_TAIL): 379241675Suqs /* FALLTHROUGH */ 380241675Suqs case (MDOC_HEAD): 381241675Suqs p->norm = p->parent->norm; 382241675Suqs break; 383241675Suqs default: 384241675Suqs break; 385241675Suqs } 386241675Suqs 387241675Suqs if ( ! mdoc_valid_pre(mdoc, p)) 388241675Suqs return(0); 389241675Suqs 390241675Suqs switch (p->type) { 391241675Suqs case (MDOC_HEAD): 392241675Suqs assert(MDOC_BLOCK == p->parent->type); 393241675Suqs p->parent->head = p; 394241675Suqs break; 395241675Suqs case (MDOC_TAIL): 396241675Suqs assert(MDOC_BLOCK == p->parent->type); 397241675Suqs p->parent->tail = p; 398241675Suqs break; 399241675Suqs case (MDOC_BODY): 400241675Suqs if (p->end) 401241675Suqs break; 402241675Suqs assert(MDOC_BLOCK == p->parent->type); 403241675Suqs p->parent->body = p; 404241675Suqs break; 405241675Suqs default: 406241675Suqs break; 407241675Suqs } 408241675Suqs 409241675Suqs mdoc->last = p; 410241675Suqs 411241675Suqs switch (p->type) { 412241675Suqs case (MDOC_TBL): 413241675Suqs /* FALLTHROUGH */ 414241675Suqs case (MDOC_TEXT): 415241675Suqs if ( ! mdoc_valid_post(mdoc)) 416241675Suqs return(0); 417241675Suqs break; 418241675Suqs default: 419241675Suqs break; 420241675Suqs } 421241675Suqs 422241675Suqs return(1); 423241675Suqs} 424241675Suqs 425241675Suqs 426241675Suqsstatic struct mdoc_node * 427241675Suqsnode_alloc(struct mdoc *m, int line, int pos, 428241675Suqs enum mdoct tok, enum mdoc_type type) 429241675Suqs{ 430241675Suqs struct mdoc_node *p; 431241675Suqs 432241675Suqs p = mandoc_calloc(1, sizeof(struct mdoc_node)); 433241675Suqs p->sec = m->lastsec; 434241675Suqs p->line = line; 435241675Suqs p->pos = pos; 436241675Suqs p->tok = tok; 437241675Suqs p->type = type; 438241675Suqs 439241675Suqs /* Flag analysis. */ 440241675Suqs 441241675Suqs if (MDOC_SYNOPSIS & m->flags) 442241675Suqs p->flags |= MDOC_SYNPRETTY; 443241675Suqs else 444241675Suqs p->flags &= ~MDOC_SYNPRETTY; 445241675Suqs if (MDOC_NEWLINE & m->flags) 446241675Suqs p->flags |= MDOC_LINE; 447241675Suqs m->flags &= ~MDOC_NEWLINE; 448241675Suqs 449241675Suqs return(p); 450241675Suqs} 451241675Suqs 452241675Suqs 453241675Suqsint 454241675Suqsmdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 455241675Suqs{ 456241675Suqs struct mdoc_node *p; 457241675Suqs 458241675Suqs p = node_alloc(m, line, pos, tok, MDOC_TAIL); 459241675Suqs if ( ! node_append(m, p)) 460241675Suqs return(0); 461241675Suqs m->next = MDOC_NEXT_CHILD; 462241675Suqs return(1); 463241675Suqs} 464241675Suqs 465241675Suqs 466241675Suqsint 467241675Suqsmdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 468241675Suqs{ 469241675Suqs struct mdoc_node *p; 470241675Suqs 471241675Suqs assert(m->first); 472241675Suqs assert(m->last); 473241675Suqs 474241675Suqs p = node_alloc(m, line, pos, tok, MDOC_HEAD); 475241675Suqs if ( ! node_append(m, p)) 476241675Suqs return(0); 477241675Suqs m->next = MDOC_NEXT_CHILD; 478241675Suqs return(1); 479241675Suqs} 480241675Suqs 481241675Suqs 482241675Suqsint 483241675Suqsmdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) 484241675Suqs{ 485241675Suqs struct mdoc_node *p; 486241675Suqs 487241675Suqs p = node_alloc(m, line, pos, tok, MDOC_BODY); 488241675Suqs if ( ! node_append(m, p)) 489241675Suqs return(0); 490241675Suqs m->next = MDOC_NEXT_CHILD; 491241675Suqs return(1); 492241675Suqs} 493241675Suqs 494241675Suqs 495241675Suqsint 496241675Suqsmdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok, 497241675Suqs struct mdoc_node *body, enum mdoc_endbody end) 498241675Suqs{ 499241675Suqs struct mdoc_node *p; 500241675Suqs 501241675Suqs p = node_alloc(m, line, pos, tok, MDOC_BODY); 502241675Suqs p->pending = body; 503241675Suqs p->end = end; 504241675Suqs if ( ! node_append(m, p)) 505241675Suqs return(0); 506241675Suqs m->next = MDOC_NEXT_SIBLING; 507241675Suqs return(1); 508241675Suqs} 509241675Suqs 510241675Suqs 511241675Suqsint 512241675Suqsmdoc_block_alloc(struct mdoc *m, int line, int pos, 513241675Suqs enum mdoct tok, struct mdoc_arg *args) 514241675Suqs{ 515241675Suqs struct mdoc_node *p; 516241675Suqs 517241675Suqs p = node_alloc(m, line, pos, tok, MDOC_BLOCK); 518241675Suqs p->args = args; 519241675Suqs if (p->args) 520241675Suqs (args->refcnt)++; 521241675Suqs 522241675Suqs switch (tok) { 523241675Suqs case (MDOC_Bd): 524241675Suqs /* FALLTHROUGH */ 525241675Suqs case (MDOC_Bf): 526241675Suqs /* FALLTHROUGH */ 527241675Suqs case (MDOC_Bl): 528241675Suqs /* FALLTHROUGH */ 529241675Suqs case (MDOC_Rs): 530241675Suqs p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 531241675Suqs break; 532241675Suqs default: 533241675Suqs break; 534241675Suqs } 535241675Suqs 536241675Suqs if ( ! node_append(m, p)) 537241675Suqs return(0); 538241675Suqs m->next = MDOC_NEXT_CHILD; 539241675Suqs return(1); 540241675Suqs} 541241675Suqs 542241675Suqs 543241675Suqsint 544241675Suqsmdoc_elem_alloc(struct mdoc *m, int line, int pos, 545241675Suqs enum mdoct tok, struct mdoc_arg *args) 546241675Suqs{ 547241675Suqs struct mdoc_node *p; 548241675Suqs 549241675Suqs p = node_alloc(m, line, pos, tok, MDOC_ELEM); 550241675Suqs p->args = args; 551241675Suqs if (p->args) 552241675Suqs (args->refcnt)++; 553241675Suqs 554241675Suqs switch (tok) { 555241675Suqs case (MDOC_An): 556241675Suqs p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 557241675Suqs break; 558241675Suqs default: 559241675Suqs break; 560241675Suqs } 561241675Suqs 562241675Suqs if ( ! node_append(m, p)) 563241675Suqs return(0); 564241675Suqs m->next = MDOC_NEXT_CHILD; 565241675Suqs return(1); 566241675Suqs} 567241675Suqs 568241675Suqsint 569241675Suqsmdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) 570241675Suqs{ 571241675Suqs struct mdoc_node *n; 572241675Suqs 573241675Suqs n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT); 574241675Suqs n->string = roff_strdup(m->roff, p); 575241675Suqs 576241675Suqs if ( ! node_append(m, n)) 577241675Suqs return(0); 578241675Suqs 579241675Suqs m->next = MDOC_NEXT_SIBLING; 580241675Suqs return(1); 581241675Suqs} 582241675Suqs 583241675Suqs 584241675Suqsstatic void 585241675Suqsmdoc_node_free(struct mdoc_node *p) 586241675Suqs{ 587241675Suqs 588241675Suqs if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) 589241675Suqs free(p->norm); 590241675Suqs if (p->string) 591241675Suqs free(p->string); 592241675Suqs if (p->args) 593241675Suqs mdoc_argv_free(p->args); 594241675Suqs free(p); 595241675Suqs} 596241675Suqs 597241675Suqs 598241675Suqsstatic void 599241675Suqsmdoc_node_unlink(struct mdoc *m, struct mdoc_node *n) 600241675Suqs{ 601241675Suqs 602241675Suqs /* Adjust siblings. */ 603241675Suqs 604241675Suqs if (n->prev) 605241675Suqs n->prev->next = n->next; 606241675Suqs if (n->next) 607241675Suqs n->next->prev = n->prev; 608241675Suqs 609241675Suqs /* Adjust parent. */ 610241675Suqs 611241675Suqs if (n->parent) { 612241675Suqs n->parent->nchild--; 613241675Suqs if (n->parent->child == n) 614241675Suqs n->parent->child = n->prev ? n->prev : n->next; 615241675Suqs if (n->parent->last == n) 616241675Suqs n->parent->last = n->prev ? n->prev : NULL; 617241675Suqs } 618241675Suqs 619241675Suqs /* Adjust parse point, if applicable. */ 620241675Suqs 621241675Suqs if (m && m->last == n) { 622241675Suqs if (n->prev) { 623241675Suqs m->last = n->prev; 624241675Suqs m->next = MDOC_NEXT_SIBLING; 625241675Suqs } else { 626241675Suqs m->last = n->parent; 627241675Suqs m->next = MDOC_NEXT_CHILD; 628241675Suqs } 629241675Suqs } 630241675Suqs 631241675Suqs if (m && m->first == n) 632241675Suqs m->first = NULL; 633241675Suqs} 634241675Suqs 635241675Suqs 636241675Suqsvoid 637241675Suqsmdoc_node_delete(struct mdoc *m, struct mdoc_node *p) 638241675Suqs{ 639241675Suqs 640241675Suqs while (p->child) { 641241675Suqs assert(p->nchild); 642241675Suqs mdoc_node_delete(m, p->child); 643241675Suqs } 644241675Suqs assert(0 == p->nchild); 645241675Suqs 646241675Suqs mdoc_node_unlink(m, p); 647241675Suqs mdoc_node_free(p); 648241675Suqs} 649241675Suqs 650241675Suqs#if 0 651241675Suqs/* 652241675Suqs * Pre-treat a text line. 653241675Suqs * Text lines can consist of equations, which must be handled apart from 654241675Suqs * the regular text. 655241675Suqs * Thus, use this function to step through a line checking if it has any 656241675Suqs * equations embedded in it. 657241675Suqs * This must handle multiple equations AND equations that do not end at 658241675Suqs * the end-of-line, i.e., will re-enter in the next roff parse. 659241675Suqs */ 660241675Suqsstatic int 661241675Suqsmdoc_preptext(struct mdoc *m, int line, char *buf, int offs) 662241675Suqs{ 663241675Suqs char *start, *end; 664241675Suqs char delim; 665241675Suqs 666241675Suqs while ('\0' != buf[offs]) { 667241675Suqs /* Mark starting position if eqn is set. */ 668241675Suqs start = NULL; 669241675Suqs if ('\0' != (delim = roff_eqndelim(m->roff))) 670241675Suqs if (NULL != (start = strchr(buf + offs, delim))) 671241675Suqs *start++ = '\0'; 672241675Suqs 673241675Suqs /* Parse text as normal. */ 674241675Suqs if ( ! mdoc_ptext(m, line, buf, offs)) 675241675Suqs return(0); 676241675Suqs 677241675Suqs /* Continue only if an equation exists. */ 678241675Suqs if (NULL == start) 679241675Suqs break; 680241675Suqs 681241675Suqs /* Read past the end of the equation. */ 682241675Suqs offs += start - (buf + offs); 683241675Suqs assert(start == &buf[offs]); 684241675Suqs if (NULL != (end = strchr(buf + offs, delim))) { 685241675Suqs *end++ = '\0'; 686241675Suqs while (' ' == *end) 687241675Suqs end++; 688241675Suqs } 689241675Suqs 690241675Suqs /* Parse the equation itself. */ 691241675Suqs roff_openeqn(m->roff, NULL, line, offs, buf); 692241675Suqs 693241675Suqs /* Process a finished equation? */ 694241675Suqs if (roff_closeeqn(m->roff)) 695241675Suqs if ( ! mdoc_addeqn(m, roff_eqn(m->roff))) 696241675Suqs return(0); 697241675Suqs offs += (end - (buf + offs)); 698241675Suqs } 699241675Suqs 700241675Suqs return(1); 701241675Suqs} 702241675Suqs#endif 703241675Suqs 704241675Suqs/* 705241675Suqs * Parse free-form text, that is, a line that does not begin with the 706241675Suqs * control character. 707241675Suqs */ 708241675Suqsstatic int 709241675Suqsmdoc_ptext(struct mdoc *m, int line, char *buf, int offs) 710241675Suqs{ 711241675Suqs char *c, *ws, *end; 712241675Suqs struct mdoc_node *n; 713241675Suqs 714241675Suqs /* No text before an initial macro. */ 715241675Suqs 716241675Suqs if (SEC_NONE == m->lastnamed) { 717241675Suqs mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT); 718241675Suqs return(1); 719241675Suqs } 720241675Suqs 721241675Suqs assert(m->last); 722241675Suqs n = m->last; 723241675Suqs 724241675Suqs /* 725241675Suqs * Divert directly to list processing if we're encountering a 726241675Suqs * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry 727241675Suqs * (a MDOC_BODY means it's already open, in which case we should 728241675Suqs * process within its context in the normal way). 729241675Suqs */ 730241675Suqs 731241675Suqs if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 732241675Suqs LIST_column == n->norm->Bl.type) { 733241675Suqs /* `Bl' is open without any children. */ 734241675Suqs m->flags |= MDOC_FREECOL; 735241675Suqs return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); 736241675Suqs } 737241675Suqs 738241675Suqs if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 739241675Suqs NULL != n->parent && 740241675Suqs MDOC_Bl == n->parent->tok && 741241675Suqs LIST_column == n->parent->norm->Bl.type) { 742241675Suqs /* `Bl' has block-level `It' children. */ 743241675Suqs m->flags |= MDOC_FREECOL; 744241675Suqs return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); 745241675Suqs } 746241675Suqs 747241675Suqs /* 748241675Suqs * Search for the beginning of unescaped trailing whitespace (ws) 749241675Suqs * and for the first character not to be output (end). 750241675Suqs */ 751241675Suqs 752241675Suqs /* FIXME: replace with strcspn(). */ 753241675Suqs ws = NULL; 754241675Suqs for (c = end = buf + offs; *c; c++) { 755241675Suqs switch (*c) { 756241675Suqs case ' ': 757241675Suqs if (NULL == ws) 758241675Suqs ws = c; 759241675Suqs continue; 760241675Suqs case '\t': 761241675Suqs /* 762241675Suqs * Always warn about trailing tabs, 763241675Suqs * even outside literal context, 764241675Suqs * where they should be put on the next line. 765241675Suqs */ 766241675Suqs if (NULL == ws) 767241675Suqs ws = c; 768241675Suqs /* 769241675Suqs * Strip trailing tabs in literal context only; 770241675Suqs * outside, they affect the next line. 771241675Suqs */ 772241675Suqs if (MDOC_LITERAL & m->flags) 773241675Suqs continue; 774241675Suqs break; 775241675Suqs case '\\': 776241675Suqs /* Skip the escaped character, too, if any. */ 777241675Suqs if (c[1]) 778241675Suqs c++; 779241675Suqs /* FALLTHROUGH */ 780241675Suqs default: 781241675Suqs ws = NULL; 782241675Suqs break; 783241675Suqs } 784241675Suqs end = c + 1; 785241675Suqs } 786241675Suqs *end = '\0'; 787241675Suqs 788241675Suqs if (ws) 789241675Suqs mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); 790241675Suqs 791241675Suqs if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) { 792241675Suqs mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN); 793241675Suqs 794241675Suqs /* 795241675Suqs * Insert a `sp' in the case of a blank line. Technically, 796241675Suqs * blank lines aren't allowed, but enough manuals assume this 797241675Suqs * behaviour that we want to work around it. 798241675Suqs */ 799241675Suqs if ( ! mdoc_elem_alloc(m, line, offs, MDOC_sp, NULL)) 800241675Suqs return(0); 801241675Suqs 802241675Suqs m->next = MDOC_NEXT_SIBLING; 803241675Suqs return(1); 804241675Suqs } 805241675Suqs 806241675Suqs if ( ! mdoc_word_alloc(m, line, offs, buf+offs)) 807241675Suqs return(0); 808241675Suqs 809241675Suqs if (MDOC_LITERAL & m->flags) 810241675Suqs return(1); 811241675Suqs 812241675Suqs /* 813241675Suqs * End-of-sentence check. If the last character is an unescaped 814241675Suqs * EOS character, then flag the node as being the end of a 815241675Suqs * sentence. The front-end will know how to interpret this. 816241675Suqs */ 817241675Suqs 818241675Suqs assert(buf < end); 819241675Suqs 820241675Suqs if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0)) 821241675Suqs m->last->flags |= MDOC_EOS; 822241675Suqs 823241675Suqs return(1); 824241675Suqs} 825241675Suqs 826241675Suqs 827241675Suqs/* 828241675Suqs * Parse a macro line, that is, a line beginning with the control 829241675Suqs * character. 830241675Suqs */ 831241675Suqsstatic int 832241675Suqsmdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) 833241675Suqs{ 834241675Suqs enum mdoct tok; 835241675Suqs int i, sv; 836241675Suqs char mac[5]; 837241675Suqs struct mdoc_node *n; 838241675Suqs 839241675Suqs /* Empty post-control lines are ignored. */ 840241675Suqs 841241675Suqs if ('"' == buf[offs]) { 842241675Suqs mdoc_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT); 843241675Suqs return(1); 844241675Suqs } else if ('\0' == buf[offs]) 845241675Suqs return(1); 846241675Suqs 847241675Suqs sv = offs; 848241675Suqs 849241675Suqs /* 850241675Suqs * Copy the first word into a nil-terminated buffer. 851241675Suqs * Stop copying when a tab, space, or eoln is encountered. 852241675Suqs */ 853241675Suqs 854241675Suqs i = 0; 855241675Suqs while (i < 4 && '\0' != buf[offs] && 856241675Suqs ' ' != buf[offs] && '\t' != buf[offs]) 857241675Suqs mac[i++] = buf[offs++]; 858241675Suqs 859241675Suqs mac[i] = '\0'; 860241675Suqs 861241675Suqs tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; 862241675Suqs 863241675Suqs if (MDOC_MAX == tok) { 864241675Suqs mandoc_vmsg(MANDOCERR_MACRO, m->parse, 865241675Suqs ln, sv, "%s", buf + sv - 1); 866241675Suqs return(1); 867241675Suqs } 868241675Suqs 869241675Suqs /* Disregard the first trailing tab, if applicable. */ 870241675Suqs 871241675Suqs if ('\t' == buf[offs]) 872241675Suqs offs++; 873241675Suqs 874241675Suqs /* Jump to the next non-whitespace word. */ 875241675Suqs 876241675Suqs while (buf[offs] && ' ' == buf[offs]) 877241675Suqs offs++; 878241675Suqs 879241675Suqs /* 880241675Suqs * Trailing whitespace. Note that tabs are allowed to be passed 881241675Suqs * into the parser as "text", so we only warn about spaces here. 882241675Suqs */ 883241675Suqs 884241675Suqs if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 885241675Suqs mdoc_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE); 886241675Suqs 887241675Suqs /* 888241675Suqs * If an initial macro or a list invocation, divert directly 889241675Suqs * into macro processing. 890241675Suqs */ 891241675Suqs 892241675Suqs if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) { 893241675Suqs if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) 894241675Suqs goto err; 895241675Suqs return(1); 896241675Suqs } 897241675Suqs 898241675Suqs n = m->last; 899241675Suqs assert(m->last); 900241675Suqs 901241675Suqs /* 902241675Suqs * If the first macro of a `Bl -column', open an `It' block 903241675Suqs * context around the parsed macro. 904241675Suqs */ 905241675Suqs 906241675Suqs if (MDOC_Bl == n->tok && MDOC_BODY == n->type && 907241675Suqs LIST_column == n->norm->Bl.type) { 908241675Suqs m->flags |= MDOC_FREECOL; 909241675Suqs if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) 910241675Suqs goto err; 911241675Suqs return(1); 912241675Suqs } 913241675Suqs 914241675Suqs /* 915241675Suqs * If we're following a block-level `It' within a `Bl -column' 916241675Suqs * context (perhaps opened in the above block or in ptext()), 917241675Suqs * then open an `It' block context around the parsed macro. 918241675Suqs */ 919241675Suqs 920241675Suqs if (MDOC_It == n->tok && MDOC_BLOCK == n->type && 921241675Suqs NULL != n->parent && 922241675Suqs MDOC_Bl == n->parent->tok && 923241675Suqs LIST_column == n->parent->norm->Bl.type) { 924241675Suqs m->flags |= MDOC_FREECOL; 925241675Suqs if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) 926241675Suqs goto err; 927241675Suqs return(1); 928241675Suqs } 929241675Suqs 930241675Suqs /* Normal processing of a macro. */ 931241675Suqs 932241675Suqs if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) 933241675Suqs goto err; 934241675Suqs 935241675Suqs return(1); 936241675Suqs 937241675Suqserr: /* Error out. */ 938241675Suqs 939241675Suqs m->flags |= MDOC_HALT; 940241675Suqs return(0); 941241675Suqs} 942241675Suqs 943241675Suqsenum mdelim 944241675Suqsmdoc_isdelim(const char *p) 945241675Suqs{ 946241675Suqs 947241675Suqs if ('\0' == p[0]) 948241675Suqs return(DELIM_NONE); 949241675Suqs 950241675Suqs if ('\0' == p[1]) 951241675Suqs switch (p[0]) { 952241675Suqs case('('): 953241675Suqs /* FALLTHROUGH */ 954241675Suqs case('['): 955241675Suqs return(DELIM_OPEN); 956241675Suqs case('|'): 957241675Suqs return(DELIM_MIDDLE); 958241675Suqs case('.'): 959241675Suqs /* FALLTHROUGH */ 960241675Suqs case(','): 961241675Suqs /* FALLTHROUGH */ 962241675Suqs case(';'): 963241675Suqs /* FALLTHROUGH */ 964241675Suqs case(':'): 965241675Suqs /* FALLTHROUGH */ 966241675Suqs case('?'): 967241675Suqs /* FALLTHROUGH */ 968241675Suqs case('!'): 969241675Suqs /* FALLTHROUGH */ 970241675Suqs case(')'): 971241675Suqs /* FALLTHROUGH */ 972241675Suqs case(']'): 973241675Suqs return(DELIM_CLOSE); 974241675Suqs default: 975241675Suqs return(DELIM_NONE); 976241675Suqs } 977241675Suqs 978241675Suqs if ('\\' != p[0]) 979241675Suqs return(DELIM_NONE); 980241675Suqs 981241675Suqs if (0 == strcmp(p + 1, ".")) 982241675Suqs return(DELIM_CLOSE); 983241675Suqs if (0 == strcmp(p + 1, "*(Ba")) 984241675Suqs return(DELIM_MIDDLE); 985241675Suqs 986241675Suqs return(DELIM_NONE); 987241675Suqs} 988