mdoc.c revision 1.152
1/* $OpenBSD: mdoc.c,v 1.152 2017/04/29 12:43:55 schwarze Exp $ */ 2/* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18#include <sys/types.h> 19 20#include <assert.h> 21#include <ctype.h> 22#include <stdarg.h> 23#include <stdio.h> 24#include <stdlib.h> 25#include <string.h> 26#include <time.h> 27 28#include "mandoc_aux.h" 29#include "mandoc.h" 30#include "roff.h" 31#include "mdoc.h" 32#include "libmandoc.h" 33#include "roff_int.h" 34#include "libmdoc.h" 35 36const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 37 "split", "nosplit", "ragged", 38 "unfilled", "literal", "file", 39 "offset", "bullet", "dash", 40 "hyphen", "item", "enum", 41 "tag", "diag", "hang", 42 "ohang", "inset", "column", 43 "width", "compact", "std", 44 "filled", "words", "emphasis", 45 "symbolic", "nested", "centered" 46}; 47const char * const *mdoc_argnames = __mdoc_argnames; 48 49static int mdoc_ptext(struct roff_man *, int, char *, int); 50static int mdoc_pmacro(struct roff_man *, int, char *, int); 51 52 53/* 54 * Main parse routine. Parses a single line -- really just hands off to 55 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 56 */ 57int 58mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 59{ 60 61 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 62 mdoc->flags |= MDOC_NEWLINE; 63 64 /* 65 * Let the roff nS register switch SYNOPSIS mode early, 66 * such that the parser knows at all times 67 * whether this mode is on or off. 68 * Note that this mode is also switched by the Sh macro. 69 */ 70 if (roff_getreg(mdoc->roff, "nS")) 71 mdoc->flags |= MDOC_SYNOPSIS; 72 else 73 mdoc->flags &= ~MDOC_SYNOPSIS; 74 75 return roff_getcontrol(mdoc->roff, buf, &offs) ? 76 mdoc_pmacro(mdoc, ln, buf, offs) : 77 mdoc_ptext(mdoc, ln, buf, offs); 78} 79 80void 81mdoc_macro(MACRO_PROT_ARGS) 82{ 83 assert(tok >= MDOC_Dd && tok < MDOC_MAX); 84 (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 85} 86 87void 88mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok) 89{ 90 struct roff_node *p; 91 92 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 93 roff_node_append(mdoc, p); 94 mdoc->next = ROFF_NEXT_CHILD; 95} 96 97struct roff_node * 98mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, 99 enum roff_tok tok, struct roff_node *body) 100{ 101 struct roff_node *p; 102 103 body->flags |= NODE_ENDED; 104 body->parent->flags |= NODE_ENDED; 105 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 106 p->body = body; 107 p->norm = body->norm; 108 p->end = ENDBODY_SPACE; 109 roff_node_append(mdoc, p); 110 mdoc->next = ROFF_NEXT_SIBLING; 111 return p; 112} 113 114struct roff_node * 115mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 116 enum roff_tok tok, struct mdoc_arg *args) 117{ 118 struct roff_node *p; 119 120 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 121 p->args = args; 122 if (p->args) 123 (args->refcnt)++; 124 125 switch (tok) { 126 case MDOC_Bd: 127 case MDOC_Bf: 128 case MDOC_Bl: 129 case MDOC_En: 130 case MDOC_Rs: 131 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 132 break; 133 default: 134 break; 135 } 136 roff_node_append(mdoc, p); 137 mdoc->next = ROFF_NEXT_CHILD; 138 return p; 139} 140 141void 142mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 143 enum roff_tok tok, struct mdoc_arg *args) 144{ 145 struct roff_node *p; 146 147 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 148 p->args = args; 149 if (p->args) 150 (args->refcnt)++; 151 152 switch (tok) { 153 case MDOC_An: 154 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 155 break; 156 default: 157 break; 158 } 159 roff_node_append(mdoc, p); 160 mdoc->next = ROFF_NEXT_CHILD; 161} 162 163void 164mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 165{ 166 167 roff_node_unlink(mdoc, p); 168 p->prev = p->next = NULL; 169 roff_node_append(mdoc, p); 170} 171 172/* 173 * Parse free-form text, that is, a line that does not begin with the 174 * control character. 175 */ 176static int 177mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 178{ 179 struct roff_node *n; 180 char *c, *ws, *end; 181 182 n = mdoc->last; 183 184 /* 185 * If a column list contains plain text, assume an implicit item 186 * macro. This can happen one or more times at the beginning 187 * of such a list, intermixed with non-It mdoc macros and with 188 * nodes generated on the roff level, for example by tbl. 189 */ 190 191 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 192 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 193 (n->parent != NULL && n->parent->tok == MDOC_Bl && 194 n->parent->norm->Bl.type == LIST_column)) { 195 mdoc->flags |= MDOC_FREECOL; 196 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 197 return 1; 198 } 199 200 /* 201 * Search for the beginning of unescaped trailing whitespace (ws) 202 * and for the first character not to be output (end). 203 */ 204 205 /* FIXME: replace with strcspn(). */ 206 ws = NULL; 207 for (c = end = buf + offs; *c; c++) { 208 switch (*c) { 209 case ' ': 210 if (NULL == ws) 211 ws = c; 212 continue; 213 case '\t': 214 /* 215 * Always warn about trailing tabs, 216 * even outside literal context, 217 * where they should be put on the next line. 218 */ 219 if (NULL == ws) 220 ws = c; 221 /* 222 * Strip trailing tabs in literal context only; 223 * outside, they affect the next line. 224 */ 225 if (MDOC_LITERAL & mdoc->flags) 226 continue; 227 break; 228 case '\\': 229 /* Skip the escaped character, too, if any. */ 230 if (c[1]) 231 c++; 232 /* FALLTHROUGH */ 233 default: 234 ws = NULL; 235 break; 236 } 237 end = c + 1; 238 } 239 *end = '\0'; 240 241 if (ws) 242 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 243 line, (int)(ws-buf), NULL); 244 245 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 246 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 247 line, (int)(c - buf), NULL); 248 249 /* 250 * Insert a `sp' in the case of a blank line. Technically, 251 * blank lines aren't allowed, but enough manuals assume this 252 * behaviour that we want to work around it. 253 */ 254 roff_elem_alloc(mdoc, line, offs, MDOC_sp); 255 mdoc->last->flags |= NODE_VALID | NODE_ENDED; 256 mdoc->next = ROFF_NEXT_SIBLING; 257 return 1; 258 } 259 260 roff_word_alloc(mdoc, line, offs, buf+offs); 261 262 if (mdoc->flags & MDOC_LITERAL) 263 return 1; 264 265 /* 266 * End-of-sentence check. If the last character is an unescaped 267 * EOS character, then flag the node as being the end of a 268 * sentence. The front-end will know how to interpret this. 269 */ 270 271 assert(buf < end); 272 273 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 274 mdoc->last->flags |= NODE_EOS; 275 276 for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) { 277 if (c - buf < offs + 2) 278 continue; 279 if (end - c < 4) 280 break; 281 if (isalpha((unsigned char)c[-2]) && 282 isalpha((unsigned char)c[-1]) && 283 c[1] == ' ' && 284 isupper((unsigned char)(c[2] == ' ' ? c[3] : c[2])) && 285 (c[-2] != 'n' || c[-1] != 'c') && 286 (c[-2] != 'v' || c[-1] != 's')) 287 mandoc_msg(MANDOCERR_EOS, mdoc->parse, 288 line, (int)(c - buf), NULL); 289 } 290 291 return 1; 292} 293 294/* 295 * Parse a macro line, that is, a line beginning with the control 296 * character. 297 */ 298static int 299mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 300{ 301 struct roff_node *n; 302 const char *cp; 303 size_t sz; 304 enum roff_tok tok; 305 int sv; 306 307 /* Determine the line macro. */ 308 309 sv = offs; 310 tok = TOKEN_NONE; 311 for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) 312 offs++; 313 if (sz == 2 || sz == 3) 314 tok = roffhash_find(mdoc->mdocmac, buf + sv, sz); 315 if (tok == TOKEN_NONE) { 316 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 317 ln, sv, buf + sv - 1); 318 return 1; 319 } 320 321 /* Skip a leading escape sequence or tab. */ 322 323 switch (buf[offs]) { 324 case '\\': 325 cp = buf + offs + 1; 326 mandoc_escape(&cp, NULL, NULL); 327 offs = cp - buf; 328 break; 329 case '\t': 330 offs++; 331 break; 332 default: 333 break; 334 } 335 336 /* Jump to the next non-whitespace word. */ 337 338 while (buf[offs] == ' ') 339 offs++; 340 341 /* 342 * Trailing whitespace. Note that tabs are allowed to be passed 343 * into the parser as "text", so we only warn about spaces here. 344 */ 345 346 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 347 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 348 ln, offs - 1, NULL); 349 350 /* 351 * If an initial macro or a list invocation, divert directly 352 * into macro processing. 353 */ 354 355 n = mdoc->last; 356 if (n == NULL || tok == MDOC_It || tok == MDOC_El) { 357 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 358 return 1; 359 } 360 361 /* 362 * If a column list contains a non-It macro, assume an implicit 363 * item macro. This can happen one or more times at the 364 * beginning of such a list, intermixed with text lines and 365 * with nodes generated on the roff level, for example by tbl. 366 */ 367 368 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 369 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 370 (n->parent != NULL && n->parent->tok == MDOC_Bl && 371 n->parent->norm->Bl.type == LIST_column)) { 372 mdoc->flags |= MDOC_FREECOL; 373 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 374 return 1; 375 } 376 377 /* Normal processing of a macro. */ 378 379 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 380 381 /* In quick mode (for mandocdb), abort after the NAME section. */ 382 383 if (mdoc->quick && MDOC_Sh == tok && 384 SEC_NAME != mdoc->last->sec) 385 return 2; 386 387 return 1; 388} 389 390enum mdelim 391mdoc_isdelim(const char *p) 392{ 393 394 if ('\0' == p[0]) 395 return DELIM_NONE; 396 397 if ('\0' == p[1]) 398 switch (p[0]) { 399 case '(': 400 case '[': 401 return DELIM_OPEN; 402 case '|': 403 return DELIM_MIDDLE; 404 case '.': 405 case ',': 406 case ';': 407 case ':': 408 case '?': 409 case '!': 410 case ')': 411 case ']': 412 return DELIM_CLOSE; 413 default: 414 return DELIM_NONE; 415 } 416 417 if ('\\' != p[0]) 418 return DELIM_NONE; 419 420 if (0 == strcmp(p + 1, ".")) 421 return DELIM_CLOSE; 422 if (0 == strcmp(p + 1, "fR|\\fP")) 423 return DELIM_MIDDLE; 424 425 return DELIM_NONE; 426} 427 428void 429mdoc_validate(struct roff_man *mdoc) 430{ 431 432 mdoc->last = mdoc->first; 433 mdoc_node_validate(mdoc); 434 mdoc_state_reset(mdoc); 435} 436