mdoc.c revision 1.138
1/* $OpenBSD: mdoc.c,v 1.138 2015/04/19 14:25:05 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/types.h> 19 20#include <assert.h> 21#include <ctype.h> 22#include <stdarg.h> 23#include <stdio.h> 24#include <stdlib.h> 25#include <string.h> 26#include <time.h> 27 28#include "mandoc_aux.h" 29#include "mandoc.h" 30#include "roff.h" 31#include "mdoc.h" 32#include "libmandoc.h" 33#include "roff_int.h" 34#include "libmdoc.h" 35 36const char *const __mdoc_macronames[MDOC_MAX + 1] = { 37 "text", 38 "Ap", "Dd", "Dt", "Os", 39 "Sh", "Ss", "Pp", "D1", 40 "Dl", "Bd", "Ed", "Bl", 41 "El", "It", "Ad", "An", 42 "Ar", "Cd", "Cm", "Dv", 43 "Er", "Ev", "Ex", "Fa", 44 "Fd", "Fl", "Fn", "Ft", 45 "Ic", "In", "Li", "Nd", 46 "Nm", "Op", "Ot", "Pa", 47 "Rv", "St", "Va", "Vt", 48 "Xr", "%A", "%B", "%D", 49 "%I", "%J", "%N", "%O", 50 "%P", "%R", "%T", "%V", 51 "Ac", "Ao", "Aq", "At", 52 "Bc", "Bf", "Bo", "Bq", 53 "Bsx", "Bx", "Db", "Dc", 54 "Do", "Dq", "Ec", "Ef", 55 "Em", "Eo", "Fx", "Ms", 56 "No", "Ns", "Nx", "Ox", 57 "Pc", "Pf", "Po", "Pq", 58 "Qc", "Ql", "Qo", "Qq", 59 "Re", "Rs", "Sc", "So", 60 "Sq", "Sm", "Sx", "Sy", 61 "Tn", "Ux", "Xc", "Xo", 62 "Fo", "Fc", "Oo", "Oc", 63 "Bk", "Ek", "Bt", "Hf", 64 "Fr", "Ud", "Lb", "Lp", 65 "Lk", "Mt", "Brq", "Bro", 66 "Brc", "%C", "Es", "En", 67 "Dx", "%Q", "br", "sp", 68 "%U", "Ta", "ll", 69}; 70 71const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 72 "split", "nosplit", "ragged", 73 "unfilled", "literal", "file", 74 "offset", "bullet", "dash", 75 "hyphen", "item", "enum", 76 "tag", "diag", "hang", 77 "ohang", "inset", "column", 78 "width", "compact", "std", 79 "filled", "words", "emphasis", 80 "symbolic", "nested", "centered" 81 }; 82 83const char * const *mdoc_macronames = __mdoc_macronames + 1; 84const char * const *mdoc_argnames = __mdoc_argnames; 85 86static int mdoc_ptext(struct roff_man *, int, char *, int); 87static int mdoc_pmacro(struct roff_man *, int, char *, int); 88 89 90void 91mdoc_endparse(struct roff_man *mdoc) 92{ 93 94 mdoc_macroend(mdoc); 95} 96 97/* 98 * Main parse routine. Parses a single line -- really just hands off to 99 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 100 */ 101int 102mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 103{ 104 105 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 106 mdoc->flags |= MDOC_NEWLINE; 107 108 /* 109 * Let the roff nS register switch SYNOPSIS mode early, 110 * such that the parser knows at all times 111 * whether this mode is on or off. 112 * Note that this mode is also switched by the Sh macro. 113 */ 114 if (roff_getreg(mdoc->roff, "nS")) 115 mdoc->flags |= MDOC_SYNOPSIS; 116 else 117 mdoc->flags &= ~MDOC_SYNOPSIS; 118 119 return(roff_getcontrol(mdoc->roff, buf, &offs) ? 120 mdoc_pmacro(mdoc, ln, buf, offs) : 121 mdoc_ptext(mdoc, ln, buf, offs)); 122} 123 124void 125mdoc_macro(MACRO_PROT_ARGS) 126{ 127 assert(tok > TOKEN_NONE && tok < MDOC_MAX); 128 129 if (mdoc->flags & MDOC_PBODY) { 130 if (tok == MDOC_Dt) { 131 mandoc_vmsg(MANDOCERR_DT_LATE, 132 mdoc->parse, line, ppos, 133 "Dt %s", buf + *pos); 134 return; 135 } 136 } else if ( ! (mdoc_macros[tok].flags & MDOC_PROLOGUE)) { 137 if (mdoc->meta.title == NULL) { 138 mandoc_vmsg(MANDOCERR_DT_NOTITLE, 139 mdoc->parse, line, ppos, "%s %s", 140 mdoc_macronames[tok], buf + *pos); 141 mdoc->meta.title = mandoc_strdup("UNTITLED"); 142 } 143 if (NULL == mdoc->meta.vol) 144 mdoc->meta.vol = mandoc_strdup("LOCAL"); 145 mdoc->flags |= MDOC_PBODY; 146 } 147 (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 148} 149 150void 151mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) 152{ 153 struct roff_node *p; 154 155 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 156 roff_node_append(mdoc, p); 157 mdoc->next = ROFF_NEXT_CHILD; 158} 159 160struct roff_node * 161mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, 162 struct roff_node *body, enum mdoc_endbody end) 163{ 164 struct roff_node *p; 165 166 body->flags |= MDOC_ENDED; 167 body->parent->flags |= MDOC_ENDED; 168 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 169 p->body = body; 170 p->norm = body->norm; 171 p->end = end; 172 roff_node_append(mdoc, p); 173 mdoc->next = ROFF_NEXT_SIBLING; 174 return(p); 175} 176 177struct roff_node * 178mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 179 int tok, struct mdoc_arg *args) 180{ 181 struct roff_node *p; 182 183 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 184 p->args = args; 185 if (p->args) 186 (args->refcnt)++; 187 188 switch (tok) { 189 case MDOC_Bd: 190 /* FALLTHROUGH */ 191 case MDOC_Bf: 192 /* FALLTHROUGH */ 193 case MDOC_Bl: 194 /* FALLTHROUGH */ 195 case MDOC_En: 196 /* FALLTHROUGH */ 197 case MDOC_Rs: 198 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 199 break; 200 default: 201 break; 202 } 203 roff_node_append(mdoc, p); 204 mdoc->next = ROFF_NEXT_CHILD; 205 return(p); 206} 207 208void 209mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 210 int tok, struct mdoc_arg *args) 211{ 212 struct roff_node *p; 213 214 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 215 p->args = args; 216 if (p->args) 217 (args->refcnt)++; 218 219 switch (tok) { 220 case MDOC_An: 221 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 222 break; 223 default: 224 break; 225 } 226 roff_node_append(mdoc, p); 227 mdoc->next = ROFF_NEXT_CHILD; 228} 229 230void 231mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 232{ 233 234 roff_node_unlink(mdoc, p); 235 roff_node_append(mdoc, p); 236} 237 238/* 239 * Parse free-form text, that is, a line that does not begin with the 240 * control character. 241 */ 242static int 243mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 244{ 245 struct roff_node *n; 246 char *c, *ws, *end; 247 248 assert(mdoc->last); 249 n = mdoc->last; 250 251 /* 252 * Divert directly to list processing if we're encountering a 253 * columnar ROFFT_BLOCK with or without a prior ROFFT_BLOCK entry 254 * (a ROFFT_BODY means it's already open, in which case we should 255 * process within its context in the normal way). 256 */ 257 258 if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && 259 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { 260 /* `Bl' is open without any children. */ 261 mdoc->flags |= MDOC_FREECOL; 262 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 263 return(1); 264 } 265 266 if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && 267 NULL != n->parent && 268 MDOC_Bl == n->parent->tok && 269 LIST_column == n->parent->norm->Bl.type) { 270 /* `Bl' has block-level `It' children. */ 271 mdoc->flags |= MDOC_FREECOL; 272 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 273 return(1); 274 } 275 276 /* 277 * Search for the beginning of unescaped trailing whitespace (ws) 278 * and for the first character not to be output (end). 279 */ 280 281 /* FIXME: replace with strcspn(). */ 282 ws = NULL; 283 for (c = end = buf + offs; *c; c++) { 284 switch (*c) { 285 case ' ': 286 if (NULL == ws) 287 ws = c; 288 continue; 289 case '\t': 290 /* 291 * Always warn about trailing tabs, 292 * even outside literal context, 293 * where they should be put on the next line. 294 */ 295 if (NULL == ws) 296 ws = c; 297 /* 298 * Strip trailing tabs in literal context only; 299 * outside, they affect the next line. 300 */ 301 if (MDOC_LITERAL & mdoc->flags) 302 continue; 303 break; 304 case '\\': 305 /* Skip the escaped character, too, if any. */ 306 if (c[1]) 307 c++; 308 /* FALLTHROUGH */ 309 default: 310 ws = NULL; 311 break; 312 } 313 end = c + 1; 314 } 315 *end = '\0'; 316 317 if (ws) 318 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 319 line, (int)(ws-buf), NULL); 320 321 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 322 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 323 line, (int)(c - buf), NULL); 324 325 /* 326 * Insert a `sp' in the case of a blank line. Technically, 327 * blank lines aren't allowed, but enough manuals assume this 328 * behaviour that we want to work around it. 329 */ 330 mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL); 331 mdoc->next = ROFF_NEXT_SIBLING; 332 mdoc_valid_post(mdoc); 333 return(1); 334 } 335 336 roff_word_alloc(mdoc, line, offs, buf+offs); 337 338 if (mdoc->flags & MDOC_LITERAL) 339 return(1); 340 341 /* 342 * End-of-sentence check. If the last character is an unescaped 343 * EOS character, then flag the node as being the end of a 344 * sentence. The front-end will know how to interpret this. 345 */ 346 347 assert(buf < end); 348 349 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 350 mdoc->last->flags |= MDOC_EOS; 351 return(1); 352} 353 354/* 355 * Parse a macro line, that is, a line beginning with the control 356 * character. 357 */ 358static int 359mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 360{ 361 struct roff_node *n; 362 const char *cp; 363 int tok; 364 int i, sv; 365 char mac[5]; 366 367 sv = offs; 368 369 /* 370 * Copy the first word into a nil-terminated buffer. 371 * Stop when a space, tab, escape, or eoln is encountered. 372 */ 373 374 i = 0; 375 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 376 mac[i++] = buf[offs++]; 377 378 mac[i] = '\0'; 379 380 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; 381 382 if (tok == TOKEN_NONE) { 383 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 384 ln, sv, buf + sv - 1); 385 return(1); 386 } 387 388 /* Skip a leading escape sequence or tab. */ 389 390 switch (buf[offs]) { 391 case '\\': 392 cp = buf + offs + 1; 393 mandoc_escape(&cp, NULL, NULL); 394 offs = cp - buf; 395 break; 396 case '\t': 397 offs++; 398 break; 399 default: 400 break; 401 } 402 403 /* Jump to the next non-whitespace word. */ 404 405 while (buf[offs] && ' ' == buf[offs]) 406 offs++; 407 408 /* 409 * Trailing whitespace. Note that tabs are allowed to be passed 410 * into the parser as "text", so we only warn about spaces here. 411 */ 412 413 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 414 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 415 ln, offs - 1, NULL); 416 417 /* 418 * If an initial macro or a list invocation, divert directly 419 * into macro processing. 420 */ 421 422 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 423 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 424 return(1); 425 } 426 427 n = mdoc->last; 428 assert(mdoc->last); 429 430 /* 431 * If the first macro of a `Bl -column', open an `It' block 432 * context around the parsed macro. 433 */ 434 435 if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && 436 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { 437 mdoc->flags |= MDOC_FREECOL; 438 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 439 return(1); 440 } 441 442 /* 443 * If we're following a block-level `It' within a `Bl -column' 444 * context (perhaps opened in the above block or in ptext()), 445 * then open an `It' block context around the parsed macro. 446 */ 447 448 if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && 449 NULL != n->parent && 450 MDOC_Bl == n->parent->tok && 451 LIST_column == n->parent->norm->Bl.type) { 452 mdoc->flags |= MDOC_FREECOL; 453 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 454 return(1); 455 } 456 457 /* Normal processing of a macro. */ 458 459 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 460 461 /* In quick mode (for mandocdb), abort after the NAME section. */ 462 463 if (mdoc->quick && MDOC_Sh == tok && 464 SEC_NAME != mdoc->last->sec) 465 return(2); 466 467 return(1); 468} 469 470enum mdelim 471mdoc_isdelim(const char *p) 472{ 473 474 if ('\0' == p[0]) 475 return(DELIM_NONE); 476 477 if ('\0' == p[1]) 478 switch (p[0]) { 479 case '(': 480 /* FALLTHROUGH */ 481 case '[': 482 return(DELIM_OPEN); 483 case '|': 484 return(DELIM_MIDDLE); 485 case '.': 486 /* FALLTHROUGH */ 487 case ',': 488 /* FALLTHROUGH */ 489 case ';': 490 /* FALLTHROUGH */ 491 case ':': 492 /* FALLTHROUGH */ 493 case '?': 494 /* FALLTHROUGH */ 495 case '!': 496 /* FALLTHROUGH */ 497 case ')': 498 /* FALLTHROUGH */ 499 case ']': 500 return(DELIM_CLOSE); 501 default: 502 return(DELIM_NONE); 503 } 504 505 if ('\\' != p[0]) 506 return(DELIM_NONE); 507 508 if (0 == strcmp(p + 1, ".")) 509 return(DELIM_CLOSE); 510 if (0 == strcmp(p + 1, "fR|\\fP")) 511 return(DELIM_MIDDLE); 512 513 return(DELIM_NONE); 514} 515 516void 517mdoc_deroff(char **dest, const struct roff_node *n) 518{ 519 char *cp; 520 size_t sz; 521 522 if (n->type != ROFFT_TEXT) { 523 for (n = n->child; n; n = n->next) 524 mdoc_deroff(dest, n); 525 return; 526 } 527 528 /* Skip leading whitespace. */ 529 530 for (cp = n->string; '\0' != *cp; cp++) 531 if (0 == isspace((unsigned char)*cp)) 532 break; 533 534 /* Skip trailing whitespace. */ 535 536 for (sz = strlen(cp); sz; sz--) 537 if (0 == isspace((unsigned char)cp[sz-1])) 538 break; 539 540 /* Skip empty strings. */ 541 542 if (0 == sz) 543 return; 544 545 if (NULL == *dest) { 546 *dest = mandoc_strndup(cp, sz); 547 return; 548 } 549 550 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 551 free(*dest); 552 *dest = cp; 553} 554