1/* $OpenBSD: roff.c,v 1.272 2023/10/24 20:30:49 schwarze Exp $ */
2/*
3 * Copyright (c) 2010-2015, 2017-2023 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stddef.h>
26#include <stdint.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "mandoc_aux.h"
32#include "mandoc_ohash.h"
33#include "mandoc.h"
34#include "roff.h"
35#include "mandoc_parse.h"
36#include "libmandoc.h"
37#include "roff_int.h"
38#include "tbl_parse.h"
39#include "eqn_parse.h"
40
41/* Maximum number of string expansions per line, to break infinite loops. */
42#define	EXPAND_LIMIT	1000
43
44/* Types of definitions of macros and strings. */
45#define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
46#define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
47#define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
48#define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
49#define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
50			 ROFFDEF_REN | ROFFDEF_STD)
51#define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
52
53/* --- data types --------------------------------------------------------- */
54
55/*
56 * An incredibly-simple string buffer.
57 */
58struct	roffstr {
59	char		*p; /* nil-terminated buffer */
60	size_t		 sz; /* saved strlen(p) */
61};
62
63/*
64 * A key-value roffstr pair as part of a singly-linked list.
65 */
66struct	roffkv {
67	struct roffstr	 key;
68	struct roffstr	 val;
69	struct roffkv	*next; /* next in list */
70};
71
72/*
73 * A single number register as part of a singly-linked list.
74 */
75struct	roffreg {
76	struct roffstr	 key;
77	int		 val;
78	int		 step;
79	struct roffreg	*next;
80};
81
82/*
83 * Association of request and macro names with token IDs.
84 */
85struct	roffreq {
86	enum roff_tok	 tok;
87	char		 name[];
88};
89
90/*
91 * A macro processing context.
92 * More than one is needed when macro calls are nested.
93 */
94struct	mctx {
95	char		**argv;
96	int		 argc;
97	int		 argsz;
98};
99
100struct	roff {
101	struct roff_man	*man; /* mdoc or man parser */
102	struct roffnode	*last; /* leaf of stack */
103	struct mctx	*mstack; /* stack of macro contexts */
104	int		*rstack; /* stack of inverted `ie' values */
105	struct ohash	*reqtab; /* request lookup table */
106	struct roffreg	*regtab; /* number registers */
107	struct roffkv	*strtab; /* user-defined strings & macros */
108	struct roffkv	*rentab; /* renamed strings & macros */
109	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
110	struct roffstr	*xtab; /* single-byte trans table (`tr') */
111	const char	*current_string; /* value of last called user macro */
112	struct tbl_node	*first_tbl; /* first table parsed */
113	struct tbl_node	*last_tbl; /* last table parsed */
114	struct tbl_node	*tbl; /* current table being parsed */
115	struct eqn_node	*last_eqn; /* equation parser */
116	struct eqn_node	*eqn; /* active equation parser */
117	int		 eqn_inline; /* current equation is inline */
118	int		 options; /* parse options */
119	int		 mstacksz; /* current size of mstack */
120	int		 mstackpos; /* position in mstack */
121	int		 rstacksz; /* current size limit of rstack */
122	int		 rstackpos; /* position in rstack */
123	int		 format; /* current file in mdoc or man format */
124	char		 control; /* control character */
125	char		 escape; /* escape character */
126};
127
128/*
129 * A macro definition, condition, or ignored block.
130 */
131struct	roffnode {
132	enum roff_tok	 tok; /* type of node */
133	struct roffnode	*parent; /* up one in stack */
134	int		 line; /* parse line */
135	int		 col; /* parse col */
136	char		*name; /* node name, e.g. macro name */
137	char		*end; /* custom end macro of the block */
138	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
139	int		 rule; /* content is: 1=evaluated 0=skipped */
140};
141
142#define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
143			 enum roff_tok tok, /* tok of macro */ \
144			 struct buf *buf, /* input buffer */ \
145			 int ln, /* parse line */ \
146			 int ppos, /* original pos in buffer */ \
147			 int pos, /* current pos in buffer */ \
148			 int *offs /* reset offset of buffer data */
149
150typedef	int (*roffproc)(ROFF_ARGS);
151
152struct	roffmac {
153	roffproc	 proc; /* process new macro */
154	roffproc	 text; /* process as child text of macro */
155	roffproc	 sub; /* process as child of macro */
156	int		 flags;
157#define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
158};
159
160struct	predef {
161	const char	*name; /* predefined input name */
162	const char	*str; /* replacement symbol */
163};
164
165#define	PREDEF(__name, __str) \
166	{ (__name), (__str) },
167
168/* --- function prototypes ------------------------------------------------ */
169
170static	int		 roffnode_cleanscope(struct roff *);
171static	int		 roffnode_pop(struct roff *);
172static	void		 roffnode_push(struct roff *, enum roff_tok,
173				const char *, int, int);
174static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
175static	int		 roff_als(ROFF_ARGS);
176static	int		 roff_block(ROFF_ARGS);
177static	int		 roff_block_text(ROFF_ARGS);
178static	int		 roff_block_sub(ROFF_ARGS);
179static	int		 roff_break(ROFF_ARGS);
180static	int		 roff_cblock(ROFF_ARGS);
181static	int		 roff_cc(ROFF_ARGS);
182static	int		 roff_ccond(struct roff *, int, int);
183static	int		 roff_char(ROFF_ARGS);
184static	int		 roff_cond(ROFF_ARGS);
185static	int		 roff_cond_checkend(ROFF_ARGS);
186static	int		 roff_cond_text(ROFF_ARGS);
187static	int		 roff_cond_sub(ROFF_ARGS);
188static	int		 roff_ds(ROFF_ARGS);
189static	int		 roff_ec(ROFF_ARGS);
190static	int		 roff_eo(ROFF_ARGS);
191static	int		 roff_eqndelim(struct roff *, struct buf *, int);
192static	int		 roff_evalcond(struct roff *, int, char *, int *);
193static	int		 roff_evalnum(struct roff *, int,
194				const char *, int *, int *, int);
195static	int		 roff_evalpar(struct roff *, int,
196				const char *, int *, int *, int);
197static	int		 roff_evalstrcond(const char *, int *);
198static	int		 roff_expand(struct roff *, struct buf *,
199				int, int, char);
200static	void		 roff_expand_patch(struct buf *, int,
201				const char *, int);
202static	void		 roff_free1(struct roff *);
203static	void		 roff_freereg(struct roffreg *);
204static	void		 roff_freestr(struct roffkv *);
205static	size_t		 roff_getname(struct roff *, char **, int, int);
206static	int		 roff_getnum(const char *, int *, int *, int);
207static	int		 roff_getop(const char *, int *, char *);
208static	int		 roff_getregn(struct roff *,
209				const char *, size_t, char);
210static	int		 roff_getregro(const struct roff *,
211				const char *name);
212static	const char	*roff_getstrn(struct roff *,
213				const char *, size_t, int *);
214static	int		 roff_hasregn(const struct roff *,
215				const char *, size_t);
216static	int		 roff_insec(ROFF_ARGS);
217static	int		 roff_it(ROFF_ARGS);
218static	int		 roff_line_ignore(ROFF_ARGS);
219static	void		 roff_man_alloc1(struct roff_man *);
220static	void		 roff_man_free1(struct roff_man *);
221static	int		 roff_manyarg(ROFF_ARGS);
222static	int		 roff_mc(ROFF_ARGS);
223static	int		 roff_noarg(ROFF_ARGS);
224static	int		 roff_nop(ROFF_ARGS);
225static	int		 roff_nr(ROFF_ARGS);
226static	int		 roff_onearg(ROFF_ARGS);
227static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
228				int, int);
229static	int		 roff_parse_comment(struct roff *, struct buf *,
230				int, int, char);
231static	int		 roff_parsetext(struct roff *, struct buf *,
232				int, int *);
233static	int		 roff_renamed(ROFF_ARGS);
234static	int		 roff_req_or_macro(ROFF_ARGS);
235static	int		 roff_return(ROFF_ARGS);
236static	int		 roff_rm(ROFF_ARGS);
237static	int		 roff_rn(ROFF_ARGS);
238static	int		 roff_rr(ROFF_ARGS);
239static	void		 roff_setregn(struct roff *, const char *,
240				size_t, int, char, int);
241static	void		 roff_setstr(struct roff *,
242				const char *, const char *, int);
243static	void		 roff_setstrn(struct roffkv **, const char *,
244				size_t, const char *, size_t, int);
245static	int		 roff_shift(ROFF_ARGS);
246static	int		 roff_so(ROFF_ARGS);
247static	int		 roff_tr(ROFF_ARGS);
248static	int		 roff_Dd(ROFF_ARGS);
249static	int		 roff_TE(ROFF_ARGS);
250static	int		 roff_TS(ROFF_ARGS);
251static	int		 roff_EQ(ROFF_ARGS);
252static	int		 roff_EN(ROFF_ARGS);
253static	int		 roff_T_(ROFF_ARGS);
254static	int		 roff_unsupp(ROFF_ARGS);
255static	int		 roff_userdef(ROFF_ARGS);
256
257/* --- constant data ------------------------------------------------------ */
258
259#define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
260#define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
261
262const char *__roff_name[MAN_MAX + 1] = {
263	"br",		"ce",		"fi",		"ft",
264	"ll",		"mc",		"nf",
265	"po",		"rj",		"sp",
266	"ta",		"ti",		NULL,
267	"ab",		"ad",		"af",		"aln",
268	"als",		"am",		"am1",		"ami",
269	"ami1",		"as",		"as1",		"asciify",
270	"backtrace",	"bd",		"bleedat",	"blm",
271        "box",		"boxa",		"bp",		"BP",
272	"break",	"breakchar",	"brnl",		"brp",
273	"brpnl",	"c2",		"cc",
274	"cf",		"cflags",	"ch",		"char",
275	"chop",		"class",	"close",	"CL",
276	"color",	"composite",	"continue",	"cp",
277	"cropat",	"cs",		"cu",		"da",
278	"dch",		"Dd",		"de",		"de1",
279	"defcolor",	"dei",		"dei1",		"device",
280	"devicem",	"di",		"do",		"ds",
281	"ds1",		"dwh",		"dt",		"ec",
282	"ecr",		"ecs",		"el",		"em",
283	"EN",		"eo",		"EP",		"EQ",
284	"errprint",	"ev",		"evc",		"ex",
285	"fallback",	"fam",		"fc",		"fchar",
286	"fcolor",	"fdeferlig",	"feature",	"fkern",
287	"fl",		"flig",		"fp",		"fps",
288	"fschar",	"fspacewidth",	"fspecial",	"ftr",
289	"fzoom",	"gcolor",	"hc",		"hcode",
290	"hidechar",	"hla",		"hlm",		"hpf",
291	"hpfa",		"hpfcode",	"hw",		"hy",
292	"hylang",	"hylen",	"hym",		"hypp",
293	"hys",		"ie",		"if",		"ig",
294	"index",	"it",		"itc",		"IX",
295	"kern",		"kernafter",	"kernbefore",	"kernpair",
296	"lc",		"lc_ctype",	"lds",		"length",
297	"letadj",	"lf",		"lg",		"lhang",
298	"linetabs",	"lnr",		"lnrf",		"lpfx",
299	"ls",		"lsm",		"lt",
300	"mediasize",	"minss",	"mk",		"mso",
301	"na",		"ne",		"nh",		"nhychar",
302	"nm",		"nn",		"nop",		"nr",
303	"nrf",		"nroff",	"ns",		"nx",
304	"open",		"opena",	"os",		"output",
305	"padj",		"papersize",	"pc",		"pev",
306	"pi",		"PI",		"pl",		"pm",
307	"pn",		"pnr",		"ps",
308	"psbb",		"pshape",	"pso",		"ptr",
309	"pvs",		"rchar",	"rd",		"recursionlimit",
310	"return",	"rfschar",	"rhang",
311	"rm",		"rn",		"rnn",		"rr",
312	"rs",		"rt",		"schar",	"sentchar",
313	"shc",		"shift",	"sizes",	"so",
314	"spacewidth",	"special",	"spreadwarn",	"ss",
315	"sty",		"substring",	"sv",		"sy",
316	"T&",		"tc",		"TE",
317	"TH",		"tkf",		"tl",
318	"tm",		"tm1",		"tmc",		"tr",
319	"track",	"transchar",	"trf",		"trimat",
320	"trin",		"trnt",		"troff",	"TS",
321	"uf",		"ul",		"unformat",	"unwatch",
322	"unwatchn",	"vpt",		"vs",		"warn",
323	"warnscale",	"watch",	"watchlength",	"watchn",
324	"wh",		"while",	"write",	"writec",
325	"writem",	"xflag",	".",		NULL,
326	NULL,		"text",
327	"Dd",		"Dt",		"Os",		"Sh",
328	"Ss",		"Pp",		"D1",		"Dl",
329	"Bd",		"Ed",		"Bl",		"El",
330	"It",		"Ad",		"An",		"Ap",
331	"Ar",		"Cd",		"Cm",		"Dv",
332	"Er",		"Ev",		"Ex",		"Fa",
333	"Fd",		"Fl",		"Fn",		"Ft",
334	"Ic",		"In",		"Li",		"Nd",
335	"Nm",		"Op",		"Ot",		"Pa",
336	"Rv",		"St",		"Va",		"Vt",
337	"Xr",		"%A",		"%B",		"%D",
338	"%I",		"%J",		"%N",		"%O",
339	"%P",		"%R",		"%T",		"%V",
340	"Ac",		"Ao",		"Aq",		"At",
341	"Bc",		"Bf",		"Bo",		"Bq",
342	"Bsx",		"Bx",		"Db",		"Dc",
343	"Do",		"Dq",		"Ec",		"Ef",
344	"Em",		"Eo",		"Fx",		"Ms",
345	"No",		"Ns",		"Nx",		"Ox",
346	"Pc",		"Pf",		"Po",		"Pq",
347	"Qc",		"Ql",		"Qo",		"Qq",
348	"Re",		"Rs",		"Sc",		"So",
349	"Sq",		"Sm",		"Sx",		"Sy",
350	"Tn",		"Ux",		"Xc",		"Xo",
351	"Fo",		"Fc",		"Oo",		"Oc",
352	"Bk",		"Ek",		"Bt",		"Hf",
353	"Fr",		"Ud",		"Lb",		"Lp",
354	"Lk",		"Mt",		"Brq",		"Bro",
355	"Brc",		"%C",		"Es",		"En",
356	"Dx",		"%Q",		"%U",		"Ta",
357	"Tg",		NULL,
358	"TH",		"SH",		"SS",		"TP",
359	"TQ",
360	"LP",		"PP",		"P",		"IP",
361	"HP",		"SM",		"SB",		"BI",
362	"IB",		"BR",		"RB",		"R",
363	"B",		"I",		"IR",		"RI",
364	"RE",		"RS",		"DT",		"UC",
365	"PD",		"AT",		"in",
366	"SY",		"YS",		"OP",
367	"EX",		"EE",		"UR",
368	"UE",		"MT",		"ME",		"MR",
369	NULL
370};
371const	char *const *roff_name = __roff_name;
372
373static	struct roffmac	 roffs[TOKEN_NONE] = {
374	{ roff_noarg, NULL, NULL, 0 },  /* br */
375	{ roff_onearg, NULL, NULL, 0 },  /* ce */
376	{ roff_noarg, NULL, NULL, 0 },  /* fi */
377	{ roff_onearg, NULL, NULL, 0 },  /* ft */
378	{ roff_onearg, NULL, NULL, 0 },  /* ll */
379	{ roff_mc, NULL, NULL, 0 },  /* mc */
380	{ roff_noarg, NULL, NULL, 0 },  /* nf */
381	{ roff_onearg, NULL, NULL, 0 },  /* po */
382	{ roff_onearg, NULL, NULL, 0 },  /* rj */
383	{ roff_onearg, NULL, NULL, 0 },  /* sp */
384	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
385	{ roff_onearg, NULL, NULL, 0 },  /* ti */
386	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
387	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
388	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
389	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
390	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
391	{ roff_als, NULL, NULL, 0 },  /* als */
392	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
393	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
394	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
395	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
396	{ roff_ds, NULL, NULL, 0 },  /* as */
397	{ roff_ds, NULL, NULL, 0 },  /* as1 */
398	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
399	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
400	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
401	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
402	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
403	{ roff_unsupp, NULL, NULL, 0 },  /* box */
404	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
405	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
406	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
407	{ roff_break, NULL, NULL, 0 },  /* break */
408	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
409	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
410	{ roff_noarg, NULL, NULL, 0 },  /* brp */
411	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
412	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
413	{ roff_cc, NULL, NULL, 0 },  /* cc */
414	{ roff_insec, NULL, NULL, 0 },  /* cf */
415	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
416	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
417	{ roff_char, NULL, NULL, 0 },  /* char */
418	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
419	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
420	{ roff_insec, NULL, NULL, 0 },  /* close */
421	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
422	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
423	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
424	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
425	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
426	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
427	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
428	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
429	{ roff_unsupp, NULL, NULL, 0 },  /* da */
430	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
431	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
432	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
433	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
434	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
435	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
436	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
437	{ roff_unsupp, NULL, NULL, 0 },  /* device */
438	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
439	{ roff_unsupp, NULL, NULL, 0 },  /* di */
440	{ roff_unsupp, NULL, NULL, 0 },  /* do */
441	{ roff_ds, NULL, NULL, 0 },  /* ds */
442	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
443	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
444	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
445	{ roff_ec, NULL, NULL, 0 },  /* ec */
446	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
447	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
448	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
449	{ roff_unsupp, NULL, NULL, 0 },  /* em */
450	{ roff_EN, NULL, NULL, 0 },  /* EN */
451	{ roff_eo, NULL, NULL, 0 },  /* eo */
452	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
453	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
454	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
455	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
456	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
457	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
458	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
459	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
460	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
461	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
462	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
463	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
464	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
465	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
466	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
467	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
468	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
469	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
470	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
471	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
472	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
473	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
474	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
475	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
476	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
477	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
478	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
479	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
480	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
481	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
482	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
483	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
484	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
485	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
486	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
487	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
488	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
489	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
490	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
491	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
492	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
493	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
494	{ roff_unsupp, NULL, NULL, 0 },  /* index */
495	{ roff_it, NULL, NULL, 0 },  /* it */
496	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
497	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
498	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
499	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
500	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
501	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
502	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
503	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
504	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
505	{ roff_unsupp, NULL, NULL, 0 },  /* length */
506	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
507	{ roff_insec, NULL, NULL, 0 },  /* lf */
508	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
509	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
510	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
511	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
512	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
513	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
514	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
515	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
516	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
517	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
518	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
519	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
520	{ roff_insec, NULL, NULL, 0 },  /* mso */
521	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
522	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
523	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
524	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
525	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
526	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
527	{ roff_nop, NULL, NULL, 0 },  /* nop */
528	{ roff_nr, NULL, NULL, 0 },  /* nr */
529	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
530	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
531	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
532	{ roff_insec, NULL, NULL, 0 },  /* nx */
533	{ roff_insec, NULL, NULL, 0 },  /* open */
534	{ roff_insec, NULL, NULL, 0 },  /* opena */
535	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
536	{ roff_unsupp, NULL, NULL, 0 },  /* output */
537	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
538	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
539	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
540	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
541	{ roff_insec, NULL, NULL, 0 },  /* pi */
542	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
543	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
544	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
545	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
546	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
547	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
548	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
549	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
550	{ roff_insec, NULL, NULL, 0 },  /* pso */
551	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
552	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
553	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
554	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
555	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
556	{ roff_return, NULL, NULL, 0 },  /* return */
557	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
558	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
559	{ roff_rm, NULL, NULL, 0 },  /* rm */
560	{ roff_rn, NULL, NULL, 0 },  /* rn */
561	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
562	{ roff_rr, NULL, NULL, 0 },  /* rr */
563	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
564	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
565	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
566	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
567	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
568	{ roff_shift, NULL, NULL, 0 },  /* shift */
569	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
570	{ roff_so, NULL, NULL, 0 },  /* so */
571	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
572	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
573	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
574	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
575	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
576	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
577	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
578	{ roff_insec, NULL, NULL, 0 },  /* sy */
579	{ roff_T_, NULL, NULL, 0 },  /* T& */
580	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
581	{ roff_TE, NULL, NULL, 0 },  /* TE */
582	{ roff_Dd, NULL, NULL, 0 },  /* TH */
583	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
584	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
585	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
586	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
587	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
588	{ roff_tr, NULL, NULL, 0 },  /* tr */
589	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
590	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
591	{ roff_insec, NULL, NULL, 0 },  /* trf */
592	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
593	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
594	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
595	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
596	{ roff_TS, NULL, NULL, 0 },  /* TS */
597	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
598	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
599	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
600	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
601	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
602	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
603	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
604	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
605	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
606	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
607	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
608	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
609	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
610	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
611	{ roff_insec, NULL, NULL, 0 },  /* write */
612	{ roff_insec, NULL, NULL, 0 },  /* writec */
613	{ roff_insec, NULL, NULL, 0 },  /* writem */
614	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
615	{ roff_cblock, NULL, NULL, 0 },  /* . */
616	{ roff_renamed, NULL, NULL, 0 },
617	{ roff_userdef, NULL, NULL, 0 }
618};
619
620/* Array of injected predefined strings. */
621#define	PREDEFS_MAX	 38
622static	const struct predef predefs[PREDEFS_MAX] = {
623#include "predefs.in"
624};
625
626static	int	 roffce_lines;	/* number of input lines to center */
627static	struct roff_node *roffce_node;  /* active request */
628static	int	 roffit_lines;  /* number of lines to delay */
629static	char	*roffit_macro;  /* nil-terminated macro line */
630
631
632/* --- request table ------------------------------------------------------ */
633
634struct ohash *
635roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
636{
637	struct ohash	*htab;
638	struct roffreq	*req;
639	enum roff_tok	 tok;
640	size_t		 sz;
641	unsigned int	 slot;
642
643	htab = mandoc_malloc(sizeof(*htab));
644	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
645
646	for (tok = mintok; tok < maxtok; tok++) {
647		if (roff_name[tok] == NULL)
648			continue;
649		sz = strlen(roff_name[tok]);
650		req = mandoc_malloc(sizeof(*req) + sz + 1);
651		req->tok = tok;
652		memcpy(req->name, roff_name[tok], sz + 1);
653		slot = ohash_qlookup(htab, req->name);
654		ohash_insert(htab, slot, req);
655	}
656	return htab;
657}
658
659void
660roffhash_free(struct ohash *htab)
661{
662	struct roffreq	*req;
663	unsigned int	 slot;
664
665	if (htab == NULL)
666		return;
667	for (req = ohash_first(htab, &slot); req != NULL;
668	     req = ohash_next(htab, &slot))
669		free(req);
670	ohash_delete(htab);
671	free(htab);
672}
673
674enum roff_tok
675roffhash_find(struct ohash *htab, const char *name, size_t sz)
676{
677	struct roffreq	*req;
678	const char	*end;
679
680	if (sz) {
681		end = name + sz;
682		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
683	} else
684		req = ohash_find(htab, ohash_qlookup(htab, name));
685	return req == NULL ? TOKEN_NONE : req->tok;
686}
687
688/* --- stack of request blocks -------------------------------------------- */
689
690/*
691 * Pop the current node off of the stack of roff instructions currently
692 * pending.  Return 1 if it is a loop or 0 otherwise.
693 */
694static int
695roffnode_pop(struct roff *r)
696{
697	struct roffnode	*p;
698	int		 inloop;
699
700	p = r->last;
701	inloop = p->tok == ROFF_while;
702	r->last = p->parent;
703	free(p->name);
704	free(p->end);
705	free(p);
706	return inloop;
707}
708
709/*
710 * Push a roff node onto the instruction stack.  This must later be
711 * removed with roffnode_pop().
712 */
713static void
714roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
715		int line, int col)
716{
717	struct roffnode	*p;
718
719	p = mandoc_calloc(1, sizeof(struct roffnode));
720	p->tok = tok;
721	if (name)
722		p->name = mandoc_strdup(name);
723	p->parent = r->last;
724	p->line = line;
725	p->col = col;
726	p->rule = p->parent ? p->parent->rule : 0;
727
728	r->last = p;
729}
730
731/* --- roff parser state data management ---------------------------------- */
732
733static void
734roff_free1(struct roff *r)
735{
736	int		 i;
737
738	tbl_free(r->first_tbl);
739	r->first_tbl = r->last_tbl = r->tbl = NULL;
740
741	eqn_free(r->last_eqn);
742	r->last_eqn = r->eqn = NULL;
743
744	while (r->mstackpos >= 0)
745		roff_userret(r);
746
747	while (r->last)
748		roffnode_pop(r);
749
750	free (r->rstack);
751	r->rstack = NULL;
752	r->rstacksz = 0;
753	r->rstackpos = -1;
754
755	roff_freereg(r->regtab);
756	r->regtab = NULL;
757
758	roff_freestr(r->strtab);
759	roff_freestr(r->rentab);
760	roff_freestr(r->xmbtab);
761	r->strtab = r->rentab = r->xmbtab = NULL;
762
763	if (r->xtab)
764		for (i = 0; i < 128; i++)
765			free(r->xtab[i].p);
766	free(r->xtab);
767	r->xtab = NULL;
768}
769
770void
771roff_reset(struct roff *r)
772{
773	roff_free1(r);
774	r->options |= MPARSE_COMMENT;
775	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
776	r->control = '\0';
777	r->escape = '\\';
778	roffce_lines = 0;
779	roffce_node = NULL;
780	roffit_lines = 0;
781	roffit_macro = NULL;
782}
783
784void
785roff_free(struct roff *r)
786{
787	int		 i;
788
789	roff_free1(r);
790	for (i = 0; i < r->mstacksz; i++)
791		free(r->mstack[i].argv);
792	free(r->mstack);
793	roffhash_free(r->reqtab);
794	free(r);
795}
796
797struct roff *
798roff_alloc(int options)
799{
800	struct roff	*r;
801
802	r = mandoc_calloc(1, sizeof(struct roff));
803	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
804	r->options = options | MPARSE_COMMENT;
805	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
806	r->mstackpos = -1;
807	r->rstackpos = -1;
808	r->escape = '\\';
809	return r;
810}
811
812/* --- syntax tree state data management ---------------------------------- */
813
814static void
815roff_man_free1(struct roff_man *man)
816{
817	if (man->meta.first != NULL)
818		roff_node_delete(man, man->meta.first);
819	free(man->meta.msec);
820	free(man->meta.vol);
821	free(man->meta.os);
822	free(man->meta.arch);
823	free(man->meta.title);
824	free(man->meta.name);
825	free(man->meta.date);
826	free(man->meta.sodest);
827}
828
829void
830roff_state_reset(struct roff_man *man)
831{
832	man->last = man->meta.first;
833	man->last_es = NULL;
834	man->flags = 0;
835	man->lastsec = man->lastnamed = SEC_NONE;
836	man->next = ROFF_NEXT_CHILD;
837	roff_setreg(man->roff, "nS", 0, '=');
838}
839
840static void
841roff_man_alloc1(struct roff_man *man)
842{
843	memset(&man->meta, 0, sizeof(man->meta));
844	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
845	man->meta.first->type = ROFFT_ROOT;
846	man->meta.macroset = MACROSET_NONE;
847	roff_state_reset(man);
848}
849
850void
851roff_man_reset(struct roff_man *man)
852{
853	roff_man_free1(man);
854	roff_man_alloc1(man);
855}
856
857void
858roff_man_free(struct roff_man *man)
859{
860	roff_man_free1(man);
861	free(man->os_r);
862	free(man);
863}
864
865struct roff_man *
866roff_man_alloc(struct roff *roff, const char *os_s, int quick)
867{
868	struct roff_man *man;
869
870	man = mandoc_calloc(1, sizeof(*man));
871	man->roff = roff;
872	man->os_s = os_s;
873	man->quick = quick;
874	roff_man_alloc1(man);
875	roff->man = man;
876	return man;
877}
878
879/* --- syntax tree handling ----------------------------------------------- */
880
881struct roff_node *
882roff_node_alloc(struct roff_man *man, int line, int pos,
883	enum roff_type type, int tok)
884{
885	struct roff_node	*n;
886
887	n = mandoc_calloc(1, sizeof(*n));
888	n->line = line;
889	n->pos = pos;
890	n->tok = tok;
891	n->type = type;
892	n->sec = man->lastsec;
893
894	if (man->flags & MDOC_SYNOPSIS)
895		n->flags |= NODE_SYNPRETTY;
896	else
897		n->flags &= ~NODE_SYNPRETTY;
898	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
899		n->flags |= NODE_NOFILL;
900	else
901		n->flags &= ~NODE_NOFILL;
902	if (man->flags & MDOC_NEWLINE)
903		n->flags |= NODE_LINE;
904	man->flags &= ~MDOC_NEWLINE;
905
906	return n;
907}
908
909void
910roff_node_append(struct roff_man *man, struct roff_node *n)
911{
912
913	switch (man->next) {
914	case ROFF_NEXT_SIBLING:
915		if (man->last->next != NULL) {
916			n->next = man->last->next;
917			man->last->next->prev = n;
918		} else
919			man->last->parent->last = n;
920		man->last->next = n;
921		n->prev = man->last;
922		n->parent = man->last->parent;
923		break;
924	case ROFF_NEXT_CHILD:
925		if (man->last->child != NULL) {
926			n->next = man->last->child;
927			man->last->child->prev = n;
928		} else
929			man->last->last = n;
930		man->last->child = n;
931		n->parent = man->last;
932		break;
933	default:
934		abort();
935	}
936	man->last = n;
937
938	switch (n->type) {
939	case ROFFT_HEAD:
940		n->parent->head = n;
941		break;
942	case ROFFT_BODY:
943		if (n->end != ENDBODY_NOT)
944			return;
945		n->parent->body = n;
946		break;
947	case ROFFT_TAIL:
948		n->parent->tail = n;
949		break;
950	default:
951		return;
952	}
953
954	/*
955	 * Copy over the normalised-data pointer of our parent.  Not
956	 * everybody has one, but copying a null pointer is fine.
957	 */
958
959	n->norm = n->parent->norm;
960	assert(n->parent->type == ROFFT_BLOCK);
961}
962
963void
964roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
965{
966	struct roff_node	*n;
967
968	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
969	n->string = roff_strdup(man->roff, word);
970	roff_node_append(man, n);
971	n->flags |= NODE_VALID | NODE_ENDED;
972	man->next = ROFF_NEXT_SIBLING;
973}
974
975void
976roff_word_append(struct roff_man *man, const char *word)
977{
978	struct roff_node	*n;
979	char			*addstr, *newstr;
980
981	n = man->last;
982	addstr = roff_strdup(man->roff, word);
983	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
984	free(addstr);
985	free(n->string);
986	n->string = newstr;
987	man->next = ROFF_NEXT_SIBLING;
988}
989
990void
991roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
992{
993	struct roff_node	*n;
994
995	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
996	roff_node_append(man, n);
997	man->next = ROFF_NEXT_CHILD;
998}
999
1000struct roff_node *
1001roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1002{
1003	struct roff_node	*n;
1004
1005	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1006	roff_node_append(man, n);
1007	man->next = ROFF_NEXT_CHILD;
1008	return n;
1009}
1010
1011struct roff_node *
1012roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1013{
1014	struct roff_node	*n;
1015
1016	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1017	roff_node_append(man, n);
1018	man->next = ROFF_NEXT_CHILD;
1019	return n;
1020}
1021
1022struct roff_node *
1023roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1024{
1025	struct roff_node	*n;
1026
1027	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1028	roff_node_append(man, n);
1029	man->next = ROFF_NEXT_CHILD;
1030	return n;
1031}
1032
1033static void
1034roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1035{
1036	struct roff_node	*n;
1037	struct tbl_span		*span;
1038
1039	if (man->meta.macroset == MACROSET_MAN)
1040		man_breakscope(man, ROFF_TS);
1041	while ((span = tbl_span(tbl)) != NULL) {
1042		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1043		n->span = span;
1044		roff_node_append(man, n);
1045		n->flags |= NODE_VALID | NODE_ENDED;
1046		man->next = ROFF_NEXT_SIBLING;
1047	}
1048}
1049
1050void
1051roff_node_unlink(struct roff_man *man, struct roff_node *n)
1052{
1053
1054	/* Adjust siblings. */
1055
1056	if (n->prev)
1057		n->prev->next = n->next;
1058	if (n->next)
1059		n->next->prev = n->prev;
1060
1061	/* Adjust parent. */
1062
1063	if (n->parent != NULL) {
1064		if (n->parent->child == n)
1065			n->parent->child = n->next;
1066		if (n->parent->last == n)
1067			n->parent->last = n->prev;
1068	}
1069
1070	/* Adjust parse point. */
1071
1072	if (man == NULL)
1073		return;
1074	if (man->last == n) {
1075		if (n->prev == NULL) {
1076			man->last = n->parent;
1077			man->next = ROFF_NEXT_CHILD;
1078		} else {
1079			man->last = n->prev;
1080			man->next = ROFF_NEXT_SIBLING;
1081		}
1082	}
1083	if (man->meta.first == n)
1084		man->meta.first = NULL;
1085}
1086
1087void
1088roff_node_relink(struct roff_man *man, struct roff_node *n)
1089{
1090	roff_node_unlink(man, n);
1091	n->prev = n->next = NULL;
1092	roff_node_append(man, n);
1093}
1094
1095void
1096roff_node_free(struct roff_node *n)
1097{
1098
1099	if (n->args != NULL)
1100		mdoc_argv_free(n->args);
1101	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1102		free(n->norm);
1103	eqn_box_free(n->eqn);
1104	free(n->string);
1105	free(n->tag);
1106	free(n);
1107}
1108
1109void
1110roff_node_delete(struct roff_man *man, struct roff_node *n)
1111{
1112
1113	while (n->child != NULL)
1114		roff_node_delete(man, n->child);
1115	roff_node_unlink(man, n);
1116	roff_node_free(n);
1117}
1118
1119int
1120roff_node_transparent(struct roff_node *n)
1121{
1122	if (n == NULL)
1123		return 0;
1124	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1125		return 1;
1126	return roff_tok_transparent(n->tok);
1127}
1128
1129int
1130roff_tok_transparent(enum roff_tok tok)
1131{
1132	switch (tok) {
1133	case ROFF_ft:
1134	case ROFF_ll:
1135	case ROFF_mc:
1136	case ROFF_po:
1137	case ROFF_ta:
1138	case MDOC_Db:
1139	case MDOC_Es:
1140	case MDOC_Sm:
1141	case MDOC_Tg:
1142	case MAN_DT:
1143	case MAN_UC:
1144	case MAN_PD:
1145	case MAN_AT:
1146		return 1;
1147	default:
1148		return 0;
1149	}
1150}
1151
1152struct roff_node *
1153roff_node_child(struct roff_node *n)
1154{
1155	for (n = n->child; roff_node_transparent(n); n = n->next)
1156		continue;
1157	return n;
1158}
1159
1160struct roff_node *
1161roff_node_prev(struct roff_node *n)
1162{
1163	do {
1164		n = n->prev;
1165	} while (roff_node_transparent(n));
1166	return n;
1167}
1168
1169struct roff_node *
1170roff_node_next(struct roff_node *n)
1171{
1172	do {
1173		n = n->next;
1174	} while (roff_node_transparent(n));
1175	return n;
1176}
1177
1178void
1179deroff(char **dest, const struct roff_node *n)
1180{
1181	char	*cp;
1182	size_t	 sz;
1183
1184	if (n->string == NULL) {
1185		for (n = n->child; n != NULL; n = n->next)
1186			deroff(dest, n);
1187		return;
1188	}
1189
1190	/* Skip leading whitespace. */
1191
1192	for (cp = n->string; *cp != '\0'; cp++) {
1193		if (cp[0] == '\\' && cp[1] != '\0' &&
1194		    strchr(" %&0^|~", cp[1]) != NULL)
1195			cp++;
1196		else if ( ! isspace((unsigned char)*cp))
1197			break;
1198	}
1199
1200	/* Skip trailing backslash. */
1201
1202	sz = strlen(cp);
1203	if (sz > 0 && cp[sz - 1] == '\\')
1204		sz--;
1205
1206	/* Skip trailing whitespace. */
1207
1208	for (; sz; sz--)
1209		if ( ! isspace((unsigned char)cp[sz-1]))
1210			break;
1211
1212	/* Skip empty strings. */
1213
1214	if (sz == 0)
1215		return;
1216
1217	if (*dest == NULL) {
1218		*dest = mandoc_strndup(cp, sz);
1219		return;
1220	}
1221
1222	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1223	free(*dest);
1224	*dest = cp;
1225}
1226
1227/* --- main functions of the roff parser ---------------------------------- */
1228
1229/*
1230 * Save comments preceding the title macro, for example in order to
1231 * preserve Copyright and license headers in HTML output,
1232 * provide diagnostics about RCS ids and trailing whitespace in comments,
1233 * then discard comments including preceding whitespace.
1234 * This function also handles input line continuation.
1235 */
1236static int
1237roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1238{
1239	struct roff_node *n;	/* used for header comments */
1240	const char	*start;	/* start of the string to process */
1241	const char	*cp;	/* for RCS id parsing */
1242	char		*stesc;	/* start of an escape sequence ('\\') */
1243	char		*ep;	/* end of comment string */
1244	int		 rcsid;	/* kind of RCS id seen */
1245
1246	for (start = stesc = buf->buf + pos;; stesc++) {
1247		/*
1248		 * XXX Ugly hack: Remove the newline character that
1249		 * mparse_buf_r() appended to mark the end of input
1250		 * if it is not preceded by an escape character.
1251		 */
1252		if (stesc[0] == '\n') {
1253			assert(stesc[1] == '\0');
1254			stesc[0] = '\0';
1255		}
1256
1257		/* The line ends without continuation or comment. */
1258		if (stesc[0] == '\0')
1259			return ROFF_CONT;
1260
1261		/* Unescaped byte: skip it. */
1262		if (stesc[0] != ec)
1263			continue;
1264
1265		/*
1266		 * XXX Ugly hack: Do not attempt to append another line
1267		 * if the function mparse_buf_r() appended a newline
1268		 * character to indicate the end of input.
1269		 */
1270		if (stesc[1] == '\n') {
1271			assert(stesc[2] == '\0');
1272			stesc[0] = '\0';
1273			return ROFF_CONT;
1274		}
1275
1276		/*
1277		 * An escape character at the end of an input line
1278		 * requests line continuation.
1279		 */
1280		if (stesc[1] == '\0') {
1281			stesc[0] = '\0';
1282			return ROFF_IGN | ROFF_APPEND;
1283		}
1284
1285		/* Found a comment: process it. */
1286		if (stesc[1] == '"' || stesc[1] == '#')
1287			break;
1288
1289		/* Escaped escape character: skip them both. */
1290		if (stesc[1] == ec)
1291			stesc++;
1292	}
1293
1294	/* Look for an RCS id in the comment. */
1295
1296	rcsid = 0;
1297	if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1298		rcsid = 1 << MANDOC_OS_OPENBSD;
1299		cp += 8;
1300	} else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1301		rcsid = 1 << MANDOC_OS_NETBSD;
1302		cp += 7;
1303	}
1304	if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1305	    strchr(cp, '$') != NULL) {
1306		if (r->man->meta.rcsids & rcsid)
1307			mandoc_msg(MANDOCERR_RCS_REP, ln,
1308			    (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1309		r->man->meta.rcsids |= rcsid;
1310	}
1311
1312	/* Warn about trailing whitespace at the end of the comment. */
1313
1314	ep = strchr(stesc + 2, '\0') - 1;
1315	if (*ep == '\n')
1316		*ep-- = '\0';
1317	if (*ep == ' ' || *ep == '\t')
1318		mandoc_msg(MANDOCERR_SPACE_EOL,
1319		    ln, (int)(ep - buf->buf), NULL);
1320
1321	/* Save comments preceding the title macro in the syntax tree. */
1322
1323	if (r->options & MPARSE_COMMENT) {
1324		while (*ep == ' ' || *ep == '\t')
1325			ep--;
1326		ep[1] = '\0';
1327		n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1328		    ROFFT_COMMENT, TOKEN_NONE);
1329		n->string = mandoc_strdup(stesc + 2);
1330		roff_node_append(r->man, n);
1331		n->flags |= NODE_VALID | NODE_ENDED;
1332		r->man->next = ROFF_NEXT_SIBLING;
1333	}
1334
1335	/* The comment requests line continuation. */
1336
1337	if (stesc[1] == '#') {
1338		*stesc = '\0';
1339		return ROFF_IGN | ROFF_APPEND;
1340	}
1341
1342	/* Discard the comment including preceding whitespace. */
1343
1344	while (stesc > start && stesc[-1] == ' ' &&
1345	    (stesc == start + 1 || stesc[-2] != '\\'))
1346		stesc--;
1347	*stesc = '\0';
1348	return ROFF_CONT;
1349}
1350
1351/*
1352 * In the current line, expand escape sequences that produce parsable
1353 * input text.  Also check the syntax of the remaining escape sequences,
1354 * which typically produce output glyphs or change formatter state.
1355 */
1356static int
1357roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1358{
1359	char		 ubuf[24];	/* buffer to print a number */
1360	struct mctx	*ctx;		/* current macro call context */
1361	const char	*res;		/* the string to be pasted */
1362	const char	*src;		/* source for copying */
1363	char		*dst;		/* destination for copying */
1364	enum mandoc_esc	 subtype;	/* return value from roff_escape */
1365	int		 iesc;		/* index of leading escape char */
1366	int		 inam;		/* index of the escape name */
1367	int		 iarg;		/* index beginning the argument */
1368	int		 iendarg;	/* index right after the argument */
1369	int		 iend;		/* index right after the sequence */
1370	int		 isrc, idst;	/* to reduce \\ and \. in names */
1371	int		 deftype;	/* type of definition to paste */
1372	int		 argi;		/* macro argument index */
1373	int		 quote_args;	/* true for \\$@, false for \\$* */
1374	int		 asz;		/* length of the replacement */
1375	int		 rsz;		/* length of the rest of the string */
1376	int		 npos;		/* position in numeric expression */
1377	int		 expand_count;	/* to avoid infinite loops */
1378
1379	expand_count = 0;
1380	while (buf->buf[pos] != '\0') {
1381
1382		/*
1383		 * Skip plain ASCII characters.
1384		 * If we have a non-standard escape character,
1385		 * escape literal backslashes because all processing in
1386		 * subsequent functions uses the standard escaping rules.
1387		 */
1388
1389		if (buf->buf[pos] != ec) {
1390			if (buf->buf[pos] == '\\') {
1391				roff_expand_patch(buf, pos, "\\e", pos + 1);
1392				pos++;
1393			}
1394			pos++;
1395			continue;
1396		}
1397
1398		/*
1399		 * Parse escape sequences,
1400		 * issue diagnostic messages when appropriate,
1401		 * and skip sequences that do not need expansion.
1402		 * If we have a non-standard escape character, translate
1403		 * it to backslashes and translate backslashes to \e.
1404		 */
1405
1406		if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1407		    &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1408			while (pos < iend) {
1409				if (buf->buf[pos] == ec) {
1410					buf->buf[pos] = '\\';
1411					if (pos + 1 < iend)
1412						pos++;
1413				} else if (buf->buf[pos] == '\\') {
1414					roff_expand_patch(buf,
1415					    pos, "\\e", pos + 1);
1416					pos++;
1417					iend++;
1418				}
1419				pos++;
1420			}
1421			continue;
1422		}
1423
1424		/* Reduce \\ and \. in names. */
1425
1426		if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1427			isrc = idst = iarg;
1428			while (isrc < iendarg) {
1429				if (isrc + 1 < iendarg &&
1430				    buf->buf[isrc] == '\\' &&
1431				    (buf->buf[isrc + 1] == '\\' ||
1432				     buf->buf[isrc + 1] == '.'))
1433					isrc++;
1434				buf->buf[idst++] = buf->buf[isrc++];
1435			}
1436			iendarg -= isrc - idst;
1437		}
1438
1439		/* Handle expansion. */
1440
1441		res = NULL;
1442		switch (buf->buf[inam]) {
1443		case '*':
1444			if (iendarg == iarg)
1445				break;
1446			deftype = ROFFDEF_USER | ROFFDEF_PRE;
1447			if ((res = roff_getstrn(r, buf->buf + iarg,
1448			    iendarg - iarg, &deftype)) != NULL)
1449				break;
1450
1451			/*
1452			 * If not overridden,
1453			 * let \*(.T through to the formatters.
1454			 */
1455
1456			if (iendarg - iarg == 2 &&
1457			    buf->buf[iarg] == '.' &&
1458			    buf->buf[iarg + 1] == 'T') {
1459				roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1460				pos = iend;
1461				continue;
1462			}
1463
1464			mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1465			    "%.*s", iendarg - iarg, buf->buf + iarg);
1466			break;
1467
1468		case '$':
1469			if (r->mstackpos < 0) {
1470				mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1471				    "%.*s", iend - iesc, buf->buf + iesc);
1472				break;
1473			}
1474			ctx = r->mstack + r->mstackpos;
1475			argi = buf->buf[iarg] - '1';
1476			if (argi >= 0 && argi <= 8) {
1477				if (argi < ctx->argc)
1478					res = ctx->argv[argi];
1479				break;
1480			}
1481			if (buf->buf[iarg] == '*')
1482				quote_args = 0;
1483			else if (buf->buf[iarg] == '@')
1484				quote_args = 1;
1485			else {
1486				mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1487				    "%.*s", iend - iesc, buf->buf + iesc);
1488				break;
1489			}
1490			asz = 0;
1491			for (argi = 0; argi < ctx->argc; argi++) {
1492				if (argi)
1493					asz++;  /* blank */
1494				if (quote_args)
1495					asz += 2;  /* quotes */
1496				asz += strlen(ctx->argv[argi]);
1497			}
1498			if (asz != iend - iesc) {
1499				rsz = buf->sz - iend;
1500				if (asz < iend - iesc)
1501					memmove(buf->buf + iesc + asz,
1502					    buf->buf + iend, rsz);
1503				buf->sz = iesc + asz + rsz;
1504				buf->buf = mandoc_realloc(buf->buf, buf->sz);
1505				if (asz > iend - iesc)
1506					memmove(buf->buf + iesc + asz,
1507					    buf->buf + iend, rsz);
1508			}
1509			dst = buf->buf + iesc;
1510			for (argi = 0; argi < ctx->argc; argi++) {
1511				if (argi)
1512					*dst++ = ' ';
1513				if (quote_args)
1514					*dst++ = '"';
1515				src = ctx->argv[argi];
1516				while (*src != '\0')
1517					*dst++ = *src++;
1518				if (quote_args)
1519					*dst++ = '"';
1520			}
1521			continue;
1522		case 'A':
1523			ubuf[0] = iendarg > iarg ? '1' : '0';
1524			ubuf[1] = '\0';
1525			res = ubuf;
1526			break;
1527		case 'B':
1528			npos = 0;
1529			ubuf[0] = iendarg > iarg && iend > iendarg &&
1530			    roff_evalnum(r, ln, buf->buf + iarg, &npos,
1531					 NULL, ROFFNUM_SCALE) &&
1532			    npos == iendarg - iarg ? '1' : '0';
1533			ubuf[1] = '\0';
1534			res = ubuf;
1535			break;
1536		case 'V':
1537			mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1538			    "%.*s", iend - iesc, buf->buf + iesc);
1539			roff_expand_patch(buf, iendarg, "}", iend);
1540			roff_expand_patch(buf, iesc, "${", iarg);
1541			continue;
1542		case 'g':
1543			break;
1544		case 'n':
1545			if (iendarg > iarg)
1546				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1547				    roff_getregn(r, buf->buf + iarg,
1548				    iendarg - iarg, buf->buf[inam + 1]));
1549			else
1550				ubuf[0] = '\0';
1551			res = ubuf;
1552			break;
1553		case 'w':
1554			rsz = 0;
1555			subtype = ESCAPE_UNDEF;
1556			while (iarg < iendarg) {
1557				asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
1558				if (buf->buf[iarg] != '\\') {
1559					rsz += asz;
1560					iarg++;
1561					continue;
1562				}
1563				switch ((subtype = roff_escape(buf->buf, 0,
1564				    iarg, NULL, NULL, NULL, NULL, &iarg))) {
1565				case ESCAPE_SPECIAL:
1566				case ESCAPE_NUMBERED:
1567				case ESCAPE_UNICODE:
1568				case ESCAPE_OVERSTRIKE:
1569				case ESCAPE_UNDEF:
1570					break;
1571				case ESCAPE_DEVICE:
1572					asz *= 8;
1573					break;
1574				case ESCAPE_EXPAND:
1575					abort();
1576				default:
1577					continue;
1578				}
1579				rsz += asz;
1580			}
1581			(void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
1582			res = ubuf;
1583			break;
1584		default:
1585			break;
1586		}
1587		if (res == NULL)
1588			res = "";
1589		if (++expand_count > EXPAND_LIMIT ||
1590		    buf->sz + strlen(res) > SHRT_MAX) {
1591			mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1592			return ROFF_IGN;
1593		}
1594		roff_expand_patch(buf, iesc, res, iend);
1595	}
1596	return ROFF_CONT;
1597}
1598
1599/*
1600 * Replace the substring from the start position (inclusive)
1601 * to end position (exclusive) with the repl(acement) string.
1602 */
1603static void
1604roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1605{
1606	char	*nbuf;
1607
1608	buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1609	    repl, buf->buf + end) + 1;
1610	free(buf->buf);
1611	buf->buf = nbuf;
1612}
1613
1614/*
1615 * Parse a quoted or unquoted roff-style request or macro argument.
1616 * Return a pointer to the parsed argument, which is either the original
1617 * pointer or advanced by one byte in case the argument is quoted.
1618 * NUL-terminate the argument in place.
1619 * Collapse pairs of quotes inside quoted arguments.
1620 * Advance the argument pointer to the next argument,
1621 * or to the NUL byte terminating the argument line.
1622 */
1623char *
1624roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1625{
1626	struct buf	 buf;
1627	char		*cp, *start;
1628	int		 newesc, pairs, quoted, white;
1629
1630	/* Quoting can only start with a new word. */
1631	start = *cpp;
1632	quoted = 0;
1633	if ('"' == *start) {
1634		quoted = 1;
1635		start++;
1636	}
1637
1638	newesc = pairs = white = 0;
1639	for (cp = start; '\0' != *cp; cp++) {
1640
1641		/*
1642		 * Move the following text left
1643		 * after quoted quotes and after "\\" and "\t".
1644		 */
1645		if (pairs)
1646			cp[-pairs] = cp[0];
1647
1648		if ('\\' == cp[0]) {
1649			/*
1650			 * In copy mode, translate double to single
1651			 * backslashes and backslash-t to literal tabs.
1652			 */
1653			switch (cp[1]) {
1654			case 'a':
1655			case 't':
1656				cp[-pairs] = '\t';
1657				pairs++;
1658				cp++;
1659				break;
1660			case '\\':
1661				cp[-pairs] = '\\';
1662				newesc = 1;
1663				pairs++;
1664				cp++;
1665				break;
1666			case ' ':
1667				/* Skip escaped blanks. */
1668				if (0 == quoted)
1669					cp++;
1670				break;
1671			default:
1672				break;
1673			}
1674		} else if (0 == quoted) {
1675			if (' ' == cp[0]) {
1676				/* Unescaped blanks end unquoted args. */
1677				white = 1;
1678				break;
1679			}
1680		} else if ('"' == cp[0]) {
1681			if ('"' == cp[1]) {
1682				/* Quoted quotes collapse. */
1683				pairs++;
1684				cp++;
1685			} else {
1686				/* Unquoted quotes end quoted args. */
1687				quoted = 2;
1688				break;
1689			}
1690		}
1691	}
1692
1693	/* Quoted argument without a closing quote. */
1694	if (1 == quoted)
1695		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1696
1697	/* NUL-terminate this argument and move to the next one. */
1698	if (pairs)
1699		cp[-pairs] = '\0';
1700	if ('\0' != *cp) {
1701		*cp++ = '\0';
1702		while (' ' == *cp)
1703			cp++;
1704	}
1705	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1706	*cpp = cp;
1707
1708	if ('\0' == *cp && (white || ' ' == cp[-1]))
1709		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1710
1711	start = mandoc_strdup(start);
1712	if (newesc == 0)
1713		return start;
1714
1715	buf.buf = start;
1716	buf.sz = strlen(start) + 1;
1717	buf.next = NULL;
1718	if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
1719		free(buf.buf);
1720		buf.buf = mandoc_strdup("");
1721	}
1722	return buf.buf;
1723}
1724
1725
1726/*
1727 * Process text streams.
1728 */
1729static int
1730roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1731{
1732	size_t		 sz;
1733	const char	*start;
1734	char		*p;
1735	int		 isz;
1736	enum mandoc_esc	 esc;
1737
1738	/* Spring the input line trap. */
1739
1740	if (roffit_lines == 1) {
1741		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1742		free(buf->buf);
1743		buf->buf = p;
1744		buf->sz = isz + 1;
1745		*offs = 0;
1746		free(roffit_macro);
1747		roffit_lines = 0;
1748		return ROFF_REPARSE;
1749	} else if (roffit_lines > 1)
1750		--roffit_lines;
1751
1752	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1753		if (roffce_lines < 1) {
1754			r->man->last = roffce_node;
1755			r->man->next = ROFF_NEXT_SIBLING;
1756			roffce_lines = 0;
1757			roffce_node = NULL;
1758		} else
1759			roffce_lines--;
1760	}
1761
1762	/* Convert all breakable hyphens into ASCII_HYPH. */
1763
1764	start = p = buf->buf + pos;
1765
1766	while (*p != '\0') {
1767		sz = strcspn(p, "-\\");
1768		p += sz;
1769
1770		if (*p == '\0')
1771			break;
1772
1773		if (*p == '\\') {
1774			/* Skip over escapes. */
1775			p++;
1776			esc = mandoc_escape((const char **)&p, NULL, NULL);
1777			if (esc == ESCAPE_ERROR)
1778				break;
1779			while (*p == '-')
1780				p++;
1781			continue;
1782		} else if (p == start) {
1783			p++;
1784			continue;
1785		}
1786
1787		if (isalpha((unsigned char)p[-1]) &&
1788		    isalpha((unsigned char)p[1]))
1789			*p = ASCII_HYPH;
1790		p++;
1791	}
1792	return ROFF_CONT;
1793}
1794
1795int
1796roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1797{
1798	enum roff_tok	 t;
1799	int		 e;
1800	int		 pos;	/* parse point */
1801	int		 spos;	/* saved parse point for messages */
1802	int		 ppos;	/* original offset in buf->buf */
1803	int		 ctl;	/* macro line (boolean) */
1804
1805	ppos = pos = *offs;
1806
1807	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1808	    (r->man->flags & ROFF_NOFILL) == 0 &&
1809	    strchr(" .\\", buf->buf[pos]) == NULL &&
1810	    buf->buf[pos] != r->control &&
1811	    strcspn(buf->buf, " ") < 80)
1812		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1813		    "%.20s...", buf->buf + pos);
1814
1815	/* Handle in-line equation delimiters. */
1816
1817	if (r->tbl == NULL &&
1818	    r->last_eqn != NULL && r->last_eqn->delim &&
1819	    (r->eqn == NULL || r->eqn_inline)) {
1820		e = roff_eqndelim(r, buf, pos);
1821		if (e == ROFF_REPARSE)
1822			return e;
1823		assert(e == ROFF_CONT);
1824	}
1825
1826	/* Handle comments and escape sequences. */
1827
1828	e = roff_parse_comment(r, buf, ln, pos, r->escape);
1829	if ((e & ROFF_MASK) == ROFF_IGN)
1830		return e;
1831	assert(e == ROFF_CONT);
1832
1833	e = roff_expand(r, buf, ln, pos, r->escape);
1834	if ((e & ROFF_MASK) == ROFF_IGN)
1835		return e;
1836	assert(e == ROFF_CONT);
1837
1838	ctl = roff_getcontrol(r, buf->buf, &pos);
1839
1840	/*
1841	 * First, if a scope is open and we're not a macro, pass the
1842	 * text through the macro's filter.
1843	 * Equations process all content themselves.
1844	 * Tables process almost all content themselves, but we want
1845	 * to warn about macros before passing it there.
1846	 */
1847
1848	if (r->last != NULL && ! ctl) {
1849		t = r->last->tok;
1850		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1851		if ((e & ROFF_MASK) == ROFF_IGN)
1852			return e;
1853		e &= ~ROFF_MASK;
1854	} else
1855		e = ROFF_IGN;
1856	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1857		eqn_read(r->eqn, buf->buf + ppos);
1858		return e;
1859	}
1860	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1861		tbl_read(r->tbl, ln, buf->buf, ppos);
1862		roff_addtbl(r->man, ln, r->tbl);
1863		return e;
1864	}
1865	if ( ! ctl) {
1866		r->options &= ~MPARSE_COMMENT;
1867		return roff_parsetext(r, buf, pos, offs) | e;
1868	}
1869
1870	/* Skip empty request lines. */
1871
1872	if (buf->buf[pos] == '"') {
1873		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1874		return ROFF_IGN;
1875	} else if (buf->buf[pos] == '\0')
1876		return ROFF_IGN;
1877
1878	/*
1879	 * If a scope is open, go to the child handler for that macro,
1880	 * as it may want to preprocess before doing anything with it.
1881	 */
1882
1883	if (r->last) {
1884		t = r->last->tok;
1885		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1886	}
1887
1888	r->options &= ~MPARSE_COMMENT;
1889	spos = pos;
1890	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1891	return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1892}
1893
1894/*
1895 * Handle a new request or macro.
1896 * May be called outside any scope or from inside a conditional scope.
1897 */
1898static int
1899roff_req_or_macro(ROFF_ARGS) {
1900
1901	/* For now, tables ignore most macros and some request. */
1902
1903	if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1904	    tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1905	    tok == ROFF_sp)) {
1906		mandoc_msg(MANDOCERR_TBLMACRO,
1907		    ln, ppos, "%s", buf->buf + ppos);
1908		if (tok != TOKEN_NONE)
1909			return ROFF_IGN;
1910		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1911			pos++;
1912		while (buf->buf[pos] == ' ')
1913			pos++;
1914		tbl_read(r->tbl, ln, buf->buf, pos);
1915		roff_addtbl(r->man, ln, r->tbl);
1916		return ROFF_IGN;
1917	}
1918
1919	/* For now, let high level macros abort .ce mode. */
1920
1921	if (roffce_node != NULL &&
1922	    (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1923	     tok == ROFF_TH || tok == ROFF_TS)) {
1924		r->man->last = roffce_node;
1925		r->man->next = ROFF_NEXT_SIBLING;
1926		roffce_lines = 0;
1927		roffce_node = NULL;
1928	}
1929
1930	/*
1931	 * This is neither a roff request nor a user-defined macro.
1932	 * Let the standard macro set parsers handle it.
1933	 */
1934
1935	if (tok == TOKEN_NONE)
1936		return ROFF_CONT;
1937
1938	/* Execute a roff request or a user-defined macro. */
1939
1940	return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1941}
1942
1943/*
1944 * Internal interface function to tell the roff parser that execution
1945 * of the current macro ended.  This is required because macro
1946 * definitions usually do not end with a .return request.
1947 */
1948void
1949roff_userret(struct roff *r)
1950{
1951	struct mctx	*ctx;
1952	int		 i;
1953
1954	assert(r->mstackpos >= 0);
1955	ctx = r->mstack + r->mstackpos;
1956	for (i = 0; i < ctx->argc; i++)
1957		free(ctx->argv[i]);
1958	ctx->argc = 0;
1959	r->mstackpos--;
1960}
1961
1962void
1963roff_endparse(struct roff *r)
1964{
1965	if (r->last != NULL)
1966		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1967		    r->last->col, "%s", roff_name[r->last->tok]);
1968
1969	if (r->eqn != NULL) {
1970		mandoc_msg(MANDOCERR_BLK_NOEND,
1971		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1972		eqn_parse(r->eqn);
1973		r->eqn = NULL;
1974	}
1975
1976	if (r->tbl != NULL) {
1977		tbl_end(r->tbl, 1);
1978		r->tbl = NULL;
1979	}
1980}
1981
1982/*
1983 * Parse the request or macro name at buf[*pos].
1984 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1985 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1986 * As a side effect, set r->current_string to the definition or to NULL.
1987 */
1988static enum roff_tok
1989roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1990{
1991	char		*cp;
1992	const char	*mac;
1993	size_t		 maclen;
1994	int		 deftype;
1995	enum roff_tok	 t;
1996
1997	cp = buf + *pos;
1998
1999	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2000		return TOKEN_NONE;
2001
2002	mac = cp;
2003	maclen = roff_getname(r, &cp, ln, ppos);
2004
2005	deftype = ROFFDEF_USER | ROFFDEF_REN;
2006	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2007	switch (deftype) {
2008	case ROFFDEF_USER:
2009		t = ROFF_USERDEF;
2010		break;
2011	case ROFFDEF_REN:
2012		t = ROFF_RENAMED;
2013		break;
2014	default:
2015		t = roffhash_find(r->reqtab, mac, maclen);
2016		break;
2017	}
2018	if (t != TOKEN_NONE)
2019		*pos = cp - buf;
2020	else if (deftype == ROFFDEF_UNDEF) {
2021		/* Using an undefined macro defines it to be empty. */
2022		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2023		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2024	}
2025	return t;
2026}
2027
2028/* --- handling of request blocks ----------------------------------------- */
2029
2030/*
2031 * Close a macro definition block or an "ignore" block.
2032 */
2033static int
2034roff_cblock(ROFF_ARGS)
2035{
2036	int	 rr;
2037
2038	if (r->last == NULL) {
2039		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2040		return ROFF_IGN;
2041	}
2042
2043	switch (r->last->tok) {
2044	case ROFF_am:
2045	case ROFF_ami:
2046	case ROFF_de:
2047	case ROFF_dei:
2048	case ROFF_ig:
2049		break;
2050	case ROFF_am1:
2051	case ROFF_de1:
2052		/* Remapped in roff_block(). */
2053		abort();
2054	default:
2055		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2056		return ROFF_IGN;
2057	}
2058
2059	roffnode_pop(r);
2060	roffnode_cleanscope(r);
2061
2062	/*
2063	 * If a conditional block with braces is still open,
2064	 * check for "\}" block end markers.
2065	 */
2066
2067	if (r->last != NULL && r->last->endspan < 0) {
2068		rr = 1;  /* If arguments follow "\}", warn about them. */
2069		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2070	}
2071
2072	if (buf->buf[pos] != '\0')
2073		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2074		    ".. %s", buf->buf + pos);
2075
2076	return ROFF_IGN;
2077}
2078
2079/*
2080 * Pop all nodes ending at the end of the current input line.
2081 * Return the number of loops ended.
2082 */
2083static int
2084roffnode_cleanscope(struct roff *r)
2085{
2086	int inloop;
2087
2088	inloop = 0;
2089	while (r->last != NULL && r->last->endspan > 0) {
2090		if (--r->last->endspan != 0)
2091			break;
2092		inloop += roffnode_pop(r);
2093	}
2094	return inloop;
2095}
2096
2097/*
2098 * Handle the closing "\}" of a conditional block.
2099 * Apart from generating warnings, this only pops nodes.
2100 * Return the number of loops ended.
2101 */
2102static int
2103roff_ccond(struct roff *r, int ln, int ppos)
2104{
2105	if (NULL == r->last) {
2106		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2107		return 0;
2108	}
2109
2110	switch (r->last->tok) {
2111	case ROFF_el:
2112	case ROFF_ie:
2113	case ROFF_if:
2114	case ROFF_while:
2115		break;
2116	default:
2117		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2118		return 0;
2119	}
2120
2121	if (r->last->endspan > -1) {
2122		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2123		return 0;
2124	}
2125
2126	return roffnode_pop(r) + roffnode_cleanscope(r);
2127}
2128
2129static int
2130roff_block(ROFF_ARGS)
2131{
2132	const char	*name, *value;
2133	char		*call, *cp, *iname, *rname;
2134	size_t		 csz, namesz, rsz;
2135	int		 deftype;
2136
2137	/* Ignore groff compatibility mode for now. */
2138
2139	if (tok == ROFF_de1)
2140		tok = ROFF_de;
2141	else if (tok == ROFF_dei1)
2142		tok = ROFF_dei;
2143	else if (tok == ROFF_am1)
2144		tok = ROFF_am;
2145	else if (tok == ROFF_ami1)
2146		tok = ROFF_ami;
2147
2148	/* Parse the macro name argument. */
2149
2150	cp = buf->buf + pos;
2151	if (tok == ROFF_ig) {
2152		iname = NULL;
2153		namesz = 0;
2154	} else {
2155		iname = cp;
2156		namesz = roff_getname(r, &cp, ln, ppos);
2157		iname[namesz] = '\0';
2158	}
2159
2160	/* Resolve the macro name argument if it is indirect. */
2161
2162	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2163		deftype = ROFFDEF_USER;
2164		name = roff_getstrn(r, iname, namesz, &deftype);
2165		if (name == NULL) {
2166			mandoc_msg(MANDOCERR_STR_UNDEF,
2167			    ln, (int)(iname - buf->buf),
2168			    "%.*s", (int)namesz, iname);
2169			namesz = 0;
2170		} else
2171			namesz = strlen(name);
2172	} else
2173		name = iname;
2174
2175	if (namesz == 0 && tok != ROFF_ig) {
2176		mandoc_msg(MANDOCERR_REQ_EMPTY,
2177		    ln, ppos, "%s", roff_name[tok]);
2178		return ROFF_IGN;
2179	}
2180
2181	roffnode_push(r, tok, name, ln, ppos);
2182
2183	/*
2184	 * At the beginning of a `de' macro, clear the existing string
2185	 * with the same name, if there is one.  New content will be
2186	 * appended from roff_block_text() in multiline mode.
2187	 */
2188
2189	if (tok == ROFF_de || tok == ROFF_dei) {
2190		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2191		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2192	} else if (tok == ROFF_am || tok == ROFF_ami) {
2193		deftype = ROFFDEF_ANY;
2194		value = roff_getstrn(r, iname, namesz, &deftype);
2195		switch (deftype) {  /* Before appending, ... */
2196		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2197			roff_setstrn(&r->strtab, name, namesz,
2198			    value, strlen(value), 0);
2199			break;
2200		case ROFFDEF_REN: /* call original standard macro. */
2201			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2202			    (int)strlen(value), value);
2203			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2204			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2205			free(call);
2206			break;
2207		case ROFFDEF_STD:  /* rename and call standard macro. */
2208			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2209			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2210			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2211			    (int)rsz, rname);
2212			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2213			free(call);
2214			free(rname);
2215			break;
2216		default:
2217			break;
2218		}
2219	}
2220
2221	if (*cp == '\0')
2222		return ROFF_IGN;
2223
2224	/* Get the custom end marker. */
2225
2226	iname = cp;
2227	namesz = roff_getname(r, &cp, ln, ppos);
2228
2229	/* Resolve the end marker if it is indirect. */
2230
2231	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2232		deftype = ROFFDEF_USER;
2233		name = roff_getstrn(r, iname, namesz, &deftype);
2234		if (name == NULL) {
2235			mandoc_msg(MANDOCERR_STR_UNDEF,
2236			    ln, (int)(iname - buf->buf),
2237			    "%.*s", (int)namesz, iname);
2238			namesz = 0;
2239		} else
2240			namesz = strlen(name);
2241	} else
2242		name = iname;
2243
2244	if (namesz)
2245		r->last->end = mandoc_strndup(name, namesz);
2246
2247	if (*cp != '\0')
2248		mandoc_msg(MANDOCERR_ARG_EXCESS,
2249		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2250
2251	return ROFF_IGN;
2252}
2253
2254static int
2255roff_block_sub(ROFF_ARGS)
2256{
2257	enum roff_tok	t;
2258	int		i, j;
2259
2260	/*
2261	 * If a custom end marker is a user-defined or predefined macro
2262	 * or a request, interpret it.
2263	 */
2264
2265	if (r->last->end) {
2266		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2267			if (buf->buf[i] != r->last->end[j])
2268				break;
2269
2270		if (r->last->end[j] == '\0' &&
2271		    (buf->buf[i] == '\0' ||
2272		     buf->buf[i] == ' ' ||
2273		     buf->buf[i] == '\t')) {
2274			roffnode_pop(r);
2275			roffnode_cleanscope(r);
2276
2277			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2278				i++;
2279
2280			pos = i;
2281			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2282			    TOKEN_NONE)
2283				return ROFF_RERUN;
2284			return ROFF_IGN;
2285		}
2286	}
2287
2288	/* Handle the standard end marker. */
2289
2290	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2291	if (t == ROFF_cblock)
2292		return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2293
2294	/* Not an end marker, so append the line to the block. */
2295
2296	if (tok != ROFF_ig)
2297		roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2298	return ROFF_IGN;
2299}
2300
2301static int
2302roff_block_text(ROFF_ARGS)
2303{
2304
2305	if (tok != ROFF_ig)
2306		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2307
2308	return ROFF_IGN;
2309}
2310
2311/*
2312 * Check for a closing "\}" and handle it.
2313 * In this function, the final "int *offs" argument is used for
2314 * different purposes than elsewhere:
2315 * Input: *offs == 0: caller wants to discard arguments following \}
2316 *        *offs == 1: caller wants to preserve text following \}
2317 * Output: *offs = 0: tell caller to discard input line
2318 *         *offs = 1: tell caller to use input line
2319 */
2320static int
2321roff_cond_checkend(ROFF_ARGS)
2322{
2323	char		*ep;
2324	int		 endloop, irc, rr;
2325
2326	irc = ROFF_IGN;
2327	rr = r->last->rule;
2328	endloop = tok != ROFF_while ? ROFF_IGN :
2329	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2330	if (roffnode_cleanscope(r))
2331		irc |= endloop;
2332
2333	/*
2334	 * If "\}" occurs on a macro line without a preceding macro or
2335	 * a text line contains nothing else, drop the line completely.
2336	 */
2337
2338	ep = buf->buf + pos;
2339	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2340		rr = 0;
2341
2342	/*
2343	 * The closing delimiter "\}" rewinds the conditional scope
2344	 * but is otherwise ignored when interpreting the line.
2345	 */
2346
2347	while ((ep = strchr(ep, '\\')) != NULL) {
2348		switch (ep[1]) {
2349		case '}':
2350			if (ep[2] == '\0')
2351				ep[0] = '\0';
2352			else if (rr)
2353				ep[1] = '&';
2354			else
2355				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2356			if (roff_ccond(r, ln, ep - buf->buf))
2357				irc |= endloop;
2358			break;
2359		case '\0':
2360			++ep;
2361			break;
2362		default:
2363			ep += 2;
2364			break;
2365		}
2366	}
2367	*offs = rr;
2368	return irc;
2369}
2370
2371/*
2372 * Parse and process a request or macro line in conditional scope.
2373 */
2374static int
2375roff_cond_sub(ROFF_ARGS)
2376{
2377	struct roffnode	*bl;
2378	int		 irc, rr, spos;
2379	enum roff_tok	 t;
2380
2381	rr = 0;  /* If arguments follow "\}", skip them. */
2382	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2383	spos = pos;
2384	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2385
2386	/*
2387	 * Handle requests and macros if the conditional evaluated
2388	 * to true or if they are structurally required.
2389	 * The .break request is always handled specially.
2390	 */
2391
2392	if (t == ROFF_break) {
2393		if (irc & ROFF_LOOPMASK)
2394			irc = ROFF_IGN | ROFF_LOOPEXIT;
2395		else if (rr) {
2396			for (bl = r->last; bl != NULL; bl = bl->parent) {
2397				bl->rule = 0;
2398				if (bl->tok == ROFF_while)
2399					break;
2400			}
2401		}
2402	} else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2403		irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2404		if (irc & ROFF_WHILE)
2405			irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2406	}
2407	return irc;
2408}
2409
2410/*
2411 * Parse and process a text line in conditional scope.
2412 */
2413static int
2414roff_cond_text(ROFF_ARGS)
2415{
2416	int	 irc, rr;
2417
2418	rr = 1;  /* If arguments follow "\}", preserve them. */
2419	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2420	if (rr)
2421		irc |= ROFF_CONT;
2422	return irc;
2423}
2424
2425/* --- handling of numeric and conditional expressions -------------------- */
2426
2427/*
2428 * Parse a single signed integer number.  Stop at the first non-digit.
2429 * If there is at least one digit, return success and advance the
2430 * parse point, else return failure and let the parse point unchanged.
2431 * Ignore overflows, treat them just like the C language.
2432 */
2433static int
2434roff_getnum(const char *v, int *pos, int *res, int flags)
2435{
2436	int	 myres, scaled, n, p;
2437
2438	if (NULL == res)
2439		res = &myres;
2440
2441	p = *pos;
2442	n = v[p] == '-';
2443	if (n || v[p] == '+')
2444		p++;
2445
2446	if (flags & ROFFNUM_WHITE)
2447		while (isspace((unsigned char)v[p]))
2448			p++;
2449
2450	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2451		*res = 10 * *res + v[p] - '0';
2452	if (p == *pos + n)
2453		return 0;
2454
2455	if (n)
2456		*res = -*res;
2457
2458	/* Each number may be followed by one optional scaling unit. */
2459
2460	switch (v[p]) {
2461	case 'f':
2462		scaled = *res * 65536;
2463		break;
2464	case 'i':
2465		scaled = *res * 240;
2466		break;
2467	case 'c':
2468		scaled = *res * 240 / 2.54;
2469		break;
2470	case 'v':
2471	case 'P':
2472		scaled = *res * 40;
2473		break;
2474	case 'm':
2475	case 'n':
2476		scaled = *res * 24;
2477		break;
2478	case 'p':
2479		scaled = *res * 10 / 3;
2480		break;
2481	case 'u':
2482		scaled = *res;
2483		break;
2484	case 'M':
2485		scaled = *res * 6 / 25;
2486		break;
2487	default:
2488		scaled = *res;
2489		p--;
2490		break;
2491	}
2492	if (flags & ROFFNUM_SCALE)
2493		*res = scaled;
2494
2495	*pos = p + 1;
2496	return 1;
2497}
2498
2499/*
2500 * Evaluate a string comparison condition.
2501 * The first character is the delimiter.
2502 * Succeed if the string up to its second occurrence
2503 * matches the string up to its third occurrence.
2504 * Advance the cursor after the third occurrence
2505 * or lacking that, to the end of the line.
2506 */
2507static int
2508roff_evalstrcond(const char *v, int *pos)
2509{
2510	const char	*s1, *s2, *s3;
2511	int		 match;
2512
2513	match = 0;
2514	s1 = v + *pos;		/* initial delimiter */
2515	s2 = s1 + 1;		/* for scanning the first string */
2516	s3 = strchr(s2, *s1);	/* for scanning the second string */
2517
2518	if (NULL == s3)		/* found no middle delimiter */
2519		goto out;
2520
2521	while ('\0' != *++s3) {
2522		if (*s2 != *s3) {  /* mismatch */
2523			s3 = strchr(s3, *s1);
2524			break;
2525		}
2526		if (*s3 == *s1) {  /* found the final delimiter */
2527			match = 1;
2528			break;
2529		}
2530		s2++;
2531	}
2532
2533out:
2534	if (NULL == s3)
2535		s3 = strchr(s2, '\0');
2536	else if (*s3 != '\0')
2537		s3++;
2538	*pos = s3 - v;
2539	return match;
2540}
2541
2542/*
2543 * Evaluate an optionally negated single character, numerical,
2544 * or string condition.
2545 */
2546static int
2547roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2548{
2549	const char	*start, *end;
2550	char		*cp, *name;
2551	size_t		 sz;
2552	int		 deftype, len, number, savepos, istrue, wanttrue;
2553
2554	if ('!' == v[*pos]) {
2555		wanttrue = 0;
2556		(*pos)++;
2557	} else
2558		wanttrue = 1;
2559
2560	switch (v[*pos]) {
2561	case '\0':
2562		return 0;
2563	case 'n':
2564	case 'o':
2565		(*pos)++;
2566		return wanttrue;
2567	case 'e':
2568	case 't':
2569	case 'v':
2570		(*pos)++;
2571		return !wanttrue;
2572	case 'c':
2573		do {
2574			(*pos)++;
2575		} while (v[*pos] == ' ');
2576
2577		/*
2578		 * Quirk for groff compatibility:
2579		 * The horizontal tab is neither available nor unavailable.
2580		 */
2581
2582		if (v[*pos] == '\t') {
2583			(*pos)++;
2584			return 0;
2585		}
2586
2587		/* Printable ASCII characters are available. */
2588
2589		if (v[*pos] != '\\') {
2590			(*pos)++;
2591			return wanttrue;
2592		}
2593
2594		end = v + ++*pos;
2595		switch (mandoc_escape(&end, &start, &len)) {
2596		case ESCAPE_SPECIAL:
2597			istrue = mchars_spec2cp(start, len) != -1;
2598			break;
2599		case ESCAPE_UNICODE:
2600			istrue = 1;
2601			break;
2602		case ESCAPE_NUMBERED:
2603			istrue = mchars_num2char(start, len) != -1;
2604			break;
2605		default:
2606			istrue = !wanttrue;
2607			break;
2608		}
2609		*pos = end - v;
2610		return istrue == wanttrue;
2611	case 'd':
2612	case 'r':
2613		cp = v + *pos + 1;
2614		while (*cp == ' ')
2615			cp++;
2616		name = cp;
2617		sz = roff_getname(r, &cp, ln, cp - v);
2618		if (sz == 0)
2619			istrue = 0;
2620		else if (v[*pos] == 'r')
2621			istrue = roff_hasregn(r, name, sz);
2622		else {
2623			deftype = ROFFDEF_ANY;
2624		        roff_getstrn(r, name, sz, &deftype);
2625			istrue = !!deftype;
2626		}
2627		*pos = (name + sz) - v;
2628		return istrue == wanttrue;
2629	default:
2630		break;
2631	}
2632
2633	savepos = *pos;
2634	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2635		return (number > 0) == wanttrue;
2636	else if (*pos == savepos)
2637		return roff_evalstrcond(v, pos) == wanttrue;
2638	else
2639		return 0;
2640}
2641
2642static int
2643roff_line_ignore(ROFF_ARGS)
2644{
2645
2646	return ROFF_IGN;
2647}
2648
2649static int
2650roff_insec(ROFF_ARGS)
2651{
2652
2653	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2654	return ROFF_IGN;
2655}
2656
2657static int
2658roff_unsupp(ROFF_ARGS)
2659{
2660
2661	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2662	return ROFF_IGN;
2663}
2664
2665static int
2666roff_cond(ROFF_ARGS)
2667{
2668	int	 irc;
2669
2670	roffnode_push(r, tok, NULL, ln, ppos);
2671
2672	/*
2673	 * An `.el' has no conditional body: it will consume the value
2674	 * of the current rstack entry set in prior `ie' calls or
2675	 * defaults to DENY.
2676	 *
2677	 * If we're not an `el', however, then evaluate the conditional.
2678	 */
2679
2680	r->last->rule = tok == ROFF_el ?
2681	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2682	    roff_evalcond(r, ln, buf->buf, &pos);
2683
2684	/*
2685	 * An if-else will put the NEGATION of the current evaluated
2686	 * conditional into the stack of rules.
2687	 */
2688
2689	if (tok == ROFF_ie) {
2690		if (r->rstackpos + 1 == r->rstacksz) {
2691			r->rstacksz += 16;
2692			r->rstack = mandoc_reallocarray(r->rstack,
2693			    r->rstacksz, sizeof(int));
2694		}
2695		r->rstack[++r->rstackpos] = !r->last->rule;
2696	}
2697
2698	/* If the parent has false as its rule, then so do we. */
2699
2700	if (r->last->parent && !r->last->parent->rule)
2701		r->last->rule = 0;
2702
2703	/*
2704	 * Determine scope.
2705	 * If there is nothing on the line after the conditional,
2706	 * not even whitespace, use next-line scope.
2707	 * Except that .while does not support next-line scope.
2708	 */
2709
2710	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2711		r->last->endspan = 2;
2712		goto out;
2713	}
2714
2715	while (buf->buf[pos] == ' ')
2716		pos++;
2717
2718	/* An opening brace requests multiline scope. */
2719
2720	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2721		r->last->endspan = -1;
2722		pos += 2;
2723		while (buf->buf[pos] == ' ')
2724			pos++;
2725		goto out;
2726	}
2727
2728	/*
2729	 * Anything else following the conditional causes
2730	 * single-line scope.  Warn if the scope contains
2731	 * nothing but trailing whitespace.
2732	 */
2733
2734	if (buf->buf[pos] == '\0')
2735		mandoc_msg(MANDOCERR_COND_EMPTY,
2736		    ln, ppos, "%s", roff_name[tok]);
2737
2738	r->last->endspan = 1;
2739
2740out:
2741	*offs = pos;
2742	irc = ROFF_RERUN;
2743	if (tok == ROFF_while)
2744		irc |= ROFF_WHILE;
2745	return irc;
2746}
2747
2748static int
2749roff_ds(ROFF_ARGS)
2750{
2751	char		*string;
2752	const char	*name;
2753	size_t		 namesz;
2754
2755	/* Ignore groff compatibility mode for now. */
2756
2757	if (tok == ROFF_ds1)
2758		tok = ROFF_ds;
2759	else if (tok == ROFF_as1)
2760		tok = ROFF_as;
2761
2762	/*
2763	 * The first word is the name of the string.
2764	 * If it is empty or terminated by an escape sequence,
2765	 * abort the `ds' request without defining anything.
2766	 */
2767
2768	name = string = buf->buf + pos;
2769	if (*name == '\0')
2770		return ROFF_IGN;
2771
2772	namesz = roff_getname(r, &string, ln, pos);
2773	switch (name[namesz]) {
2774	case '\\':
2775		return ROFF_IGN;
2776	case '\t':
2777		string = buf->buf + pos + namesz;
2778		break;
2779	default:
2780		break;
2781	}
2782
2783	/* Read past the initial double-quote, if any. */
2784	if (*string == '"')
2785		string++;
2786
2787	/* The rest is the value. */
2788	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2789	    ROFF_as == tok);
2790	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2791	return ROFF_IGN;
2792}
2793
2794/*
2795 * Parse a single operator, one or two characters long.
2796 * If the operator is recognized, return success and advance the
2797 * parse point, else return failure and let the parse point unchanged.
2798 */
2799static int
2800roff_getop(const char *v, int *pos, char *res)
2801{
2802
2803	*res = v[*pos];
2804
2805	switch (*res) {
2806	case '+':
2807	case '-':
2808	case '*':
2809	case '/':
2810	case '%':
2811	case '&':
2812	case ':':
2813		break;
2814	case '<':
2815		switch (v[*pos + 1]) {
2816		case '=':
2817			*res = 'l';
2818			(*pos)++;
2819			break;
2820		case '>':
2821			*res = '!';
2822			(*pos)++;
2823			break;
2824		case '?':
2825			*res = 'i';
2826			(*pos)++;
2827			break;
2828		default:
2829			break;
2830		}
2831		break;
2832	case '>':
2833		switch (v[*pos + 1]) {
2834		case '=':
2835			*res = 'g';
2836			(*pos)++;
2837			break;
2838		case '?':
2839			*res = 'a';
2840			(*pos)++;
2841			break;
2842		default:
2843			break;
2844		}
2845		break;
2846	case '=':
2847		if ('=' == v[*pos + 1])
2848			(*pos)++;
2849		break;
2850	default:
2851		return 0;
2852	}
2853	(*pos)++;
2854
2855	return *res;
2856}
2857
2858/*
2859 * Evaluate either a parenthesized numeric expression
2860 * or a single signed integer number.
2861 */
2862static int
2863roff_evalpar(struct roff *r, int ln,
2864	const char *v, int *pos, int *res, int flags)
2865{
2866
2867	if ('(' != v[*pos])
2868		return roff_getnum(v, pos, res, flags);
2869
2870	(*pos)++;
2871	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2872		return 0;
2873
2874	/*
2875	 * Omission of the closing parenthesis
2876	 * is an error in validation mode,
2877	 * but ignored in evaluation mode.
2878	 */
2879
2880	if (')' == v[*pos])
2881		(*pos)++;
2882	else if (NULL == res)
2883		return 0;
2884
2885	return 1;
2886}
2887
2888/*
2889 * Evaluate a complete numeric expression.
2890 * Proceed left to right, there is no concept of precedence.
2891 */
2892static int
2893roff_evalnum(struct roff *r, int ln, const char *v,
2894	int *pos, int *res, int flags)
2895{
2896	int		 mypos, operand2;
2897	char		 operator;
2898
2899	if (NULL == pos) {
2900		mypos = 0;
2901		pos = &mypos;
2902	}
2903
2904	if (flags & ROFFNUM_WHITE)
2905		while (isspace((unsigned char)v[*pos]))
2906			(*pos)++;
2907
2908	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2909		return 0;
2910
2911	while (1) {
2912		if (flags & ROFFNUM_WHITE)
2913			while (isspace((unsigned char)v[*pos]))
2914				(*pos)++;
2915
2916		if ( ! roff_getop(v, pos, &operator))
2917			break;
2918
2919		if (flags & ROFFNUM_WHITE)
2920			while (isspace((unsigned char)v[*pos]))
2921				(*pos)++;
2922
2923		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2924			return 0;
2925
2926		if (flags & ROFFNUM_WHITE)
2927			while (isspace((unsigned char)v[*pos]))
2928				(*pos)++;
2929
2930		if (NULL == res)
2931			continue;
2932
2933		switch (operator) {
2934		case '+':
2935			*res += operand2;
2936			break;
2937		case '-':
2938			*res -= operand2;
2939			break;
2940		case '*':
2941			*res *= operand2;
2942			break;
2943		case '/':
2944			if (operand2 == 0) {
2945				mandoc_msg(MANDOCERR_DIVZERO,
2946					ln, *pos, "%s", v);
2947				*res = 0;
2948				break;
2949			}
2950			*res /= operand2;
2951			break;
2952		case '%':
2953			if (operand2 == 0) {
2954				mandoc_msg(MANDOCERR_DIVZERO,
2955					ln, *pos, "%s", v);
2956				*res = 0;
2957				break;
2958			}
2959			*res %= operand2;
2960			break;
2961		case '<':
2962			*res = *res < operand2;
2963			break;
2964		case '>':
2965			*res = *res > operand2;
2966			break;
2967		case 'l':
2968			*res = *res <= operand2;
2969			break;
2970		case 'g':
2971			*res = *res >= operand2;
2972			break;
2973		case '=':
2974			*res = *res == operand2;
2975			break;
2976		case '!':
2977			*res = *res != operand2;
2978			break;
2979		case '&':
2980			*res = *res && operand2;
2981			break;
2982		case ':':
2983			*res = *res || operand2;
2984			break;
2985		case 'i':
2986			if (operand2 < *res)
2987				*res = operand2;
2988			break;
2989		case 'a':
2990			if (operand2 > *res)
2991				*res = operand2;
2992			break;
2993		default:
2994			abort();
2995		}
2996	}
2997	return 1;
2998}
2999
3000/* --- register management ------------------------------------------------ */
3001
3002void
3003roff_setreg(struct roff *r, const char *name, int val, char sign)
3004{
3005	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3006}
3007
3008static void
3009roff_setregn(struct roff *r, const char *name, size_t len,
3010    int val, char sign, int step)
3011{
3012	struct roffreg	*reg;
3013
3014	/* Search for an existing register with the same name. */
3015	reg = r->regtab;
3016
3017	while (reg != NULL && (reg->key.sz != len ||
3018	    strncmp(reg->key.p, name, len) != 0))
3019		reg = reg->next;
3020
3021	if (NULL == reg) {
3022		/* Create a new register. */
3023		reg = mandoc_malloc(sizeof(struct roffreg));
3024		reg->key.p = mandoc_strndup(name, len);
3025		reg->key.sz = len;
3026		reg->val = 0;
3027		reg->step = 0;
3028		reg->next = r->regtab;
3029		r->regtab = reg;
3030	}
3031
3032	if ('+' == sign)
3033		reg->val += val;
3034	else if ('-' == sign)
3035		reg->val -= val;
3036	else
3037		reg->val = val;
3038	if (step != INT_MIN)
3039		reg->step = step;
3040}
3041
3042/*
3043 * Handle some predefined read-only number registers.
3044 * For now, return -1 if the requested register is not predefined;
3045 * in case a predefined read-only register having the value -1
3046 * were to turn up, another special value would have to be chosen.
3047 */
3048static int
3049roff_getregro(const struct roff *r, const char *name)
3050{
3051
3052	switch (*name) {
3053	case '$':  /* Number of arguments of the last macro evaluated. */
3054		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3055	case 'A':  /* ASCII approximation mode is always off. */
3056		return 0;
3057	case 'g':  /* Groff compatibility mode is always on. */
3058		return 1;
3059	case 'H':  /* Fixed horizontal resolution. */
3060		return 24;
3061	case 'j':  /* Always adjust left margin only. */
3062		return 0;
3063	case 'T':  /* Some output device is always defined. */
3064		return 1;
3065	case 'V':  /* Fixed vertical resolution. */
3066		return 40;
3067	default:
3068		return -1;
3069	}
3070}
3071
3072int
3073roff_getreg(struct roff *r, const char *name)
3074{
3075	return roff_getregn(r, name, strlen(name), '\0');
3076}
3077
3078static int
3079roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3080{
3081	struct roffreg	*reg;
3082	int		 val;
3083
3084	if ('.' == name[0] && 2 == len) {
3085		val = roff_getregro(r, name + 1);
3086		if (-1 != val)
3087			return val;
3088	}
3089
3090	for (reg = r->regtab; reg; reg = reg->next) {
3091		if (len == reg->key.sz &&
3092		    0 == strncmp(name, reg->key.p, len)) {
3093			switch (sign) {
3094			case '+':
3095				reg->val += reg->step;
3096				break;
3097			case '-':
3098				reg->val -= reg->step;
3099				break;
3100			default:
3101				break;
3102			}
3103			return reg->val;
3104		}
3105	}
3106
3107	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3108	return 0;
3109}
3110
3111static int
3112roff_hasregn(const struct roff *r, const char *name, size_t len)
3113{
3114	struct roffreg	*reg;
3115	int		 val;
3116
3117	if ('.' == name[0] && 2 == len) {
3118		val = roff_getregro(r, name + 1);
3119		if (-1 != val)
3120			return 1;
3121	}
3122
3123	for (reg = r->regtab; reg; reg = reg->next)
3124		if (len == reg->key.sz &&
3125		    0 == strncmp(name, reg->key.p, len))
3126			return 1;
3127
3128	return 0;
3129}
3130
3131static void
3132roff_freereg(struct roffreg *reg)
3133{
3134	struct roffreg	*old_reg;
3135
3136	while (NULL != reg) {
3137		free(reg->key.p);
3138		old_reg = reg;
3139		reg = reg->next;
3140		free(old_reg);
3141	}
3142}
3143
3144static int
3145roff_nr(ROFF_ARGS)
3146{
3147	char		*key, *val, *step;
3148	size_t		 keysz;
3149	int		 iv, is, len;
3150	char		 sign;
3151
3152	key = val = buf->buf + pos;
3153	if (*key == '\0')
3154		return ROFF_IGN;
3155
3156	keysz = roff_getname(r, &val, ln, pos);
3157	if (key[keysz] == '\\' || key[keysz] == '\t')
3158		return ROFF_IGN;
3159
3160	sign = *val;
3161	if (sign == '+' || sign == '-')
3162		val++;
3163
3164	len = 0;
3165	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3166		return ROFF_IGN;
3167
3168	step = val + len;
3169	while (isspace((unsigned char)*step))
3170		step++;
3171	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3172		is = INT_MIN;
3173
3174	roff_setregn(r, key, keysz, iv, sign, is);
3175	return ROFF_IGN;
3176}
3177
3178static int
3179roff_rr(ROFF_ARGS)
3180{
3181	struct roffreg	*reg, **prev;
3182	char		*name, *cp;
3183	size_t		 namesz;
3184
3185	name = cp = buf->buf + pos;
3186	if (*name == '\0')
3187		return ROFF_IGN;
3188	namesz = roff_getname(r, &cp, ln, pos);
3189	name[namesz] = '\0';
3190
3191	prev = &r->regtab;
3192	while (1) {
3193		reg = *prev;
3194		if (reg == NULL || !strcmp(name, reg->key.p))
3195			break;
3196		prev = &reg->next;
3197	}
3198	if (reg != NULL) {
3199		*prev = reg->next;
3200		free(reg->key.p);
3201		free(reg);
3202	}
3203	return ROFF_IGN;
3204}
3205
3206/* --- handler functions for roff requests -------------------------------- */
3207
3208static int
3209roff_rm(ROFF_ARGS)
3210{
3211	const char	 *name;
3212	char		 *cp;
3213	size_t		  namesz;
3214
3215	cp = buf->buf + pos;
3216	while (*cp != '\0') {
3217		name = cp;
3218		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3219		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3220		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3221		if (name[namesz] == '\\' || name[namesz] == '\t')
3222			break;
3223	}
3224	return ROFF_IGN;
3225}
3226
3227static int
3228roff_it(ROFF_ARGS)
3229{
3230	int		 iv;
3231
3232	/* Parse the number of lines. */
3233
3234	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3235		mandoc_msg(MANDOCERR_IT_NONUM,
3236		    ln, ppos, "%s", buf->buf + 1);
3237		return ROFF_IGN;
3238	}
3239
3240	while (isspace((unsigned char)buf->buf[pos]))
3241		pos++;
3242
3243	/*
3244	 * Arm the input line trap.
3245	 * Special-casing "an-trap" is an ugly workaround to cope
3246	 * with DocBook stupidly fiddling with man(7) internals.
3247	 */
3248
3249	roffit_lines = iv;
3250	roffit_macro = mandoc_strdup(iv != 1 ||
3251	    strcmp(buf->buf + pos, "an-trap") ?
3252	    buf->buf + pos : "br");
3253	return ROFF_IGN;
3254}
3255
3256static int
3257roff_Dd(ROFF_ARGS)
3258{
3259	int		 mask;
3260	enum roff_tok	 t, te;
3261
3262	switch (tok) {
3263	case ROFF_Dd:
3264		tok = MDOC_Dd;
3265		te = MDOC_MAX;
3266		if (r->format == 0)
3267			r->format = MPARSE_MDOC;
3268		mask = MPARSE_MDOC | MPARSE_QUICK;
3269		break;
3270	case ROFF_TH:
3271		tok = MAN_TH;
3272		te = MAN_MAX;
3273		if (r->format == 0)
3274			r->format = MPARSE_MAN;
3275		mask = MPARSE_QUICK;
3276		break;
3277	default:
3278		abort();
3279	}
3280	if ((r->options & mask) == 0)
3281		for (t = tok; t < te; t++)
3282			roff_setstr(r, roff_name[t], NULL, 0);
3283	return ROFF_CONT;
3284}
3285
3286static int
3287roff_TE(ROFF_ARGS)
3288{
3289	r->man->flags &= ~ROFF_NONOFILL;
3290	if (r->tbl == NULL) {
3291		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3292		return ROFF_IGN;
3293	}
3294	if (tbl_end(r->tbl, 0) == 0) {
3295		r->tbl = NULL;
3296		free(buf->buf);
3297		buf->buf = mandoc_strdup(".sp");
3298		buf->sz = 4;
3299		*offs = 0;
3300		return ROFF_REPARSE;
3301	}
3302	r->tbl = NULL;
3303	return ROFF_IGN;
3304}
3305
3306static int
3307roff_T_(ROFF_ARGS)
3308{
3309
3310	if (NULL == r->tbl)
3311		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3312	else
3313		tbl_restart(ln, ppos, r->tbl);
3314
3315	return ROFF_IGN;
3316}
3317
3318/*
3319 * Handle in-line equation delimiters.
3320 */
3321static int
3322roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3323{
3324	char		*cp1, *cp2;
3325	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3326
3327	/*
3328	 * Outside equations, look for an opening delimiter.
3329	 * If we are inside an equation, we already know it is
3330	 * in-line, or this function wouldn't have been called;
3331	 * so look for a closing delimiter.
3332	 */
3333
3334	cp1 = buf->buf + pos;
3335	cp2 = strchr(cp1, r->eqn == NULL ?
3336	    r->last_eqn->odelim : r->last_eqn->cdelim);
3337	if (cp2 == NULL)
3338		return ROFF_CONT;
3339
3340	*cp2++ = '\0';
3341	bef_pr = bef_nl = aft_nl = aft_pr = "";
3342
3343	/* Handle preceding text, protecting whitespace. */
3344
3345	if (*buf->buf != '\0') {
3346		if (r->eqn == NULL)
3347			bef_pr = "\\&";
3348		bef_nl = "\n";
3349	}
3350
3351	/*
3352	 * Prepare replacing the delimiter with an equation macro
3353	 * and drop leading white space from the equation.
3354	 */
3355
3356	if (r->eqn == NULL) {
3357		while (*cp2 == ' ')
3358			cp2++;
3359		mac = ".EQ";
3360	} else
3361		mac = ".EN";
3362
3363	/* Handle following text, protecting whitespace. */
3364
3365	if (*cp2 != '\0') {
3366		aft_nl = "\n";
3367		if (r->eqn != NULL)
3368			aft_pr = "\\&";
3369	}
3370
3371	/* Do the actual replacement. */
3372
3373	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3374	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3375	free(buf->buf);
3376	buf->buf = cp1;
3377
3378	/* Toggle the in-line state of the eqn subsystem. */
3379
3380	r->eqn_inline = r->eqn == NULL;
3381	return ROFF_REPARSE;
3382}
3383
3384static int
3385roff_EQ(ROFF_ARGS)
3386{
3387	struct roff_node	*n;
3388
3389	if (r->man->meta.macroset == MACROSET_MAN)
3390		man_breakscope(r->man, ROFF_EQ);
3391	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3392	if (ln > r->man->last->line)
3393		n->flags |= NODE_LINE;
3394	n->eqn = eqn_box_new();
3395	roff_node_append(r->man, n);
3396	r->man->next = ROFF_NEXT_SIBLING;
3397
3398	assert(r->eqn == NULL);
3399	if (r->last_eqn == NULL)
3400		r->last_eqn = eqn_alloc();
3401	else
3402		eqn_reset(r->last_eqn);
3403	r->eqn = r->last_eqn;
3404	r->eqn->node = n;
3405
3406	if (buf->buf[pos] != '\0')
3407		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3408		    ".EQ %s", buf->buf + pos);
3409
3410	return ROFF_IGN;
3411}
3412
3413static int
3414roff_EN(ROFF_ARGS)
3415{
3416	if (r->eqn != NULL) {
3417		eqn_parse(r->eqn);
3418		r->eqn = NULL;
3419	} else
3420		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3421	if (buf->buf[pos] != '\0')
3422		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3423		    "EN %s", buf->buf + pos);
3424	return ROFF_IGN;
3425}
3426
3427static int
3428roff_TS(ROFF_ARGS)
3429{
3430	if (r->tbl != NULL) {
3431		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3432		tbl_end(r->tbl, 0);
3433	}
3434	r->man->flags |= ROFF_NONOFILL;
3435	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3436	if (r->last_tbl == NULL)
3437		r->first_tbl = r->tbl;
3438	r->last_tbl = r->tbl;
3439	return ROFF_IGN;
3440}
3441
3442static int
3443roff_noarg(ROFF_ARGS)
3444{
3445	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3446		man_breakscope(r->man, tok);
3447	if (tok == ROFF_brp)
3448		tok = ROFF_br;
3449	roff_elem_alloc(r->man, ln, ppos, tok);
3450	if (buf->buf[pos] != '\0')
3451		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3452		   "%s %s", roff_name[tok], buf->buf + pos);
3453	if (tok == ROFF_nf)
3454		r->man->flags |= ROFF_NOFILL;
3455	else if (tok == ROFF_fi)
3456		r->man->flags &= ~ROFF_NOFILL;
3457	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3458	r->man->next = ROFF_NEXT_SIBLING;
3459	return ROFF_IGN;
3460}
3461
3462static int
3463roff_onearg(ROFF_ARGS)
3464{
3465	struct roff_node	*n;
3466	char			*cp;
3467	int			 npos;
3468
3469	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3470	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3471	     tok == ROFF_ti))
3472		man_breakscope(r->man, tok);
3473
3474	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3475		r->man->last = roffce_node;
3476		r->man->next = ROFF_NEXT_SIBLING;
3477	}
3478
3479	roff_elem_alloc(r->man, ln, ppos, tok);
3480	n = r->man->last;
3481
3482	cp = buf->buf + pos;
3483	if (*cp != '\0') {
3484		while (*cp != '\0' && *cp != ' ')
3485			cp++;
3486		while (*cp == ' ')
3487			*cp++ = '\0';
3488		if (*cp != '\0')
3489			mandoc_msg(MANDOCERR_ARG_EXCESS,
3490			    ln, (int)(cp - buf->buf),
3491			    "%s ... %s", roff_name[tok], cp);
3492		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3493	}
3494
3495	if (tok == ROFF_ce || tok == ROFF_rj) {
3496		if (r->man->last->type == ROFFT_ELEM) {
3497			roff_word_alloc(r->man, ln, pos, "1");
3498			r->man->last->flags |= NODE_NOSRC;
3499		}
3500		npos = 0;
3501		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3502		    &roffce_lines, 0) == 0) {
3503			mandoc_msg(MANDOCERR_CE_NONUM,
3504			    ln, pos, "ce %s", buf->buf + pos);
3505			roffce_lines = 1;
3506		}
3507		if (roffce_lines < 1) {
3508			r->man->last = r->man->last->parent;
3509			roffce_node = NULL;
3510			roffce_lines = 0;
3511		} else
3512			roffce_node = r->man->last->parent;
3513	} else {
3514		n->flags |= NODE_VALID | NODE_ENDED;
3515		r->man->last = n;
3516	}
3517	n->flags |= NODE_LINE;
3518	r->man->next = ROFF_NEXT_SIBLING;
3519	return ROFF_IGN;
3520}
3521
3522static int
3523roff_manyarg(ROFF_ARGS)
3524{
3525	struct roff_node	*n;
3526	char			*sp, *ep;
3527
3528	roff_elem_alloc(r->man, ln, ppos, tok);
3529	n = r->man->last;
3530
3531	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3532		while (*ep != '\0' && *ep != ' ')
3533			ep++;
3534		while (*ep == ' ')
3535			*ep++ = '\0';
3536		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3537	}
3538
3539	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3540	r->man->last = n;
3541	r->man->next = ROFF_NEXT_SIBLING;
3542	return ROFF_IGN;
3543}
3544
3545static int
3546roff_als(ROFF_ARGS)
3547{
3548	char		*oldn, *newn, *end, *value;
3549	size_t		 oldsz, newsz, valsz;
3550
3551	newn = oldn = buf->buf + pos;
3552	if (*newn == '\0')
3553		return ROFF_IGN;
3554
3555	newsz = roff_getname(r, &oldn, ln, pos);
3556	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3557		return ROFF_IGN;
3558
3559	end = oldn;
3560	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3561	if (oldsz == 0)
3562		return ROFF_IGN;
3563
3564	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3565	    (int)oldsz, oldn);
3566	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3567	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3568	free(value);
3569	return ROFF_IGN;
3570}
3571
3572/*
3573 * The .break request only makes sense inside conditionals,
3574 * and that case is already handled in roff_cond_sub().
3575 */
3576static int
3577roff_break(ROFF_ARGS)
3578{
3579	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3580	return ROFF_IGN;
3581}
3582
3583static int
3584roff_cc(ROFF_ARGS)
3585{
3586	const char	*p;
3587
3588	p = buf->buf + pos;
3589
3590	if (*p == '\0' || (r->control = *p++) == '.')
3591		r->control = '\0';
3592
3593	if (*p != '\0')
3594		mandoc_msg(MANDOCERR_ARG_EXCESS,
3595		    ln, p - buf->buf, "cc ... %s", p);
3596
3597	return ROFF_IGN;
3598}
3599
3600static int
3601roff_char(ROFF_ARGS)
3602{
3603	const char	*p, *kp, *vp;
3604	size_t		 ksz, vsz;
3605	int		 font;
3606
3607	/* Parse the character to be replaced. */
3608
3609	kp = buf->buf + pos;
3610	p = kp + 1;
3611	if (*kp == '\0' || (*kp == '\\' &&
3612	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3613	    (*p != ' ' && *p != '\0')) {
3614		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3615		return ROFF_IGN;
3616	}
3617	ksz = p - kp;
3618	while (*p == ' ')
3619		p++;
3620
3621	/*
3622	 * If the replacement string contains a font escape sequence,
3623	 * we have to restore the font at the end.
3624	 */
3625
3626	vp = p;
3627	vsz = strlen(p);
3628	font = 0;
3629	while (*p != '\0') {
3630		if (*p++ != '\\')
3631			continue;
3632		switch (mandoc_escape(&p, NULL, NULL)) {
3633		case ESCAPE_FONT:
3634		case ESCAPE_FONTROMAN:
3635		case ESCAPE_FONTITALIC:
3636		case ESCAPE_FONTBOLD:
3637		case ESCAPE_FONTBI:
3638		case ESCAPE_FONTCR:
3639		case ESCAPE_FONTCB:
3640		case ESCAPE_FONTCI:
3641		case ESCAPE_FONTPREV:
3642			font++;
3643			break;
3644		default:
3645			break;
3646		}
3647	}
3648	if (font > 1)
3649		mandoc_msg(MANDOCERR_CHAR_FONT,
3650		    ln, (int)(vp - buf->buf), "%s", vp);
3651
3652	/*
3653	 * Approximate the effect of .char using the .tr tables.
3654	 * XXX In groff, .char and .tr interact differently.
3655	 */
3656
3657	if (ksz == 1) {
3658		if (r->xtab == NULL)
3659			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3660		assert((unsigned int)*kp < 128);
3661		free(r->xtab[(int)*kp].p);
3662		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3663		    "%s%s", vp, font ? "\fP" : "");
3664	} else {
3665		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3666		if (font)
3667			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3668	}
3669	return ROFF_IGN;
3670}
3671
3672static int
3673roff_ec(ROFF_ARGS)
3674{
3675	const char	*p;
3676
3677	p = buf->buf + pos;
3678	if (*p == '\0')
3679		r->escape = '\\';
3680	else {
3681		r->escape = *p;
3682		if (*++p != '\0')
3683			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3684			    (int)(p - buf->buf), "ec ... %s", p);
3685	}
3686	return ROFF_IGN;
3687}
3688
3689static int
3690roff_eo(ROFF_ARGS)
3691{
3692	r->escape = '\0';
3693	if (buf->buf[pos] != '\0')
3694		mandoc_msg(MANDOCERR_ARG_SKIP,
3695		    ln, pos, "eo %s", buf->buf + pos);
3696	return ROFF_IGN;
3697}
3698
3699static int
3700roff_mc(ROFF_ARGS)
3701{
3702	struct roff_node	*n;
3703	char			*cp;
3704
3705	/* Parse the first argument. */
3706
3707	cp = buf->buf + pos;
3708	if (*cp != '\0')
3709		cp++;
3710	if (buf->buf[pos] == '\\') {
3711		switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3712		case ESCAPE_SPECIAL:
3713		case ESCAPE_UNICODE:
3714		case ESCAPE_NUMBERED:
3715			break;
3716		default:
3717			*cp = '\0';
3718			mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3719			    "mc %s", buf->buf + pos);
3720			buf->buf[pos] = '\0';
3721			break;
3722		}
3723	}
3724
3725	/* Ignore additional arguments. */
3726
3727	while (*cp == ' ')
3728		*cp++ = '\0';
3729	if (*cp != '\0') {
3730		mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3731		    "mc ... %s", cp);
3732		*cp = '\0';
3733	}
3734
3735	/* Create the .mc node. */
3736
3737	roff_elem_alloc(r->man, ln, ppos, tok);
3738	n = r->man->last;
3739	if (buf->buf[pos] != '\0')
3740		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3741	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3742	r->man->last = n;
3743	r->man->next = ROFF_NEXT_SIBLING;
3744	return ROFF_IGN;
3745}
3746
3747static int
3748roff_nop(ROFF_ARGS)
3749{
3750	while (buf->buf[pos] == ' ')
3751		pos++;
3752	*offs = pos;
3753	return ROFF_RERUN;
3754}
3755
3756static int
3757roff_tr(ROFF_ARGS)
3758{
3759	const char	*p, *first, *second;
3760	size_t		 fsz, ssz;
3761
3762	p = buf->buf + pos;
3763
3764	if (*p == '\0') {
3765		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3766		return ROFF_IGN;
3767	}
3768
3769	while (*p != '\0') {
3770		fsz = ssz = 1;
3771
3772		first = p++;
3773		if (*first == '\\') {
3774			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3775				return ROFF_IGN;
3776			fsz = (size_t)(p - first);
3777		}
3778
3779		second = p++;
3780		if (*second == '\\') {
3781			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3782				return ROFF_IGN;
3783			ssz = (size_t)(p - second);
3784		} else if (*second == '\0') {
3785			mandoc_msg(MANDOCERR_TR_ODD, ln,
3786			    (int)(first - buf->buf), "tr %s", first);
3787			second = " ";
3788			p--;
3789		}
3790
3791		if (fsz > 1) {
3792			roff_setstrn(&r->xmbtab, first, fsz,
3793			    second, ssz, 0);
3794			continue;
3795		}
3796
3797		if (r->xtab == NULL)
3798			r->xtab = mandoc_calloc(128,
3799			    sizeof(struct roffstr));
3800
3801		free(r->xtab[(int)*first].p);
3802		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3803		r->xtab[(int)*first].sz = ssz;
3804	}
3805
3806	return ROFF_IGN;
3807}
3808
3809/*
3810 * Implementation of the .return request.
3811 * There is no need to call roff_userret() from here.
3812 * The read module will call that after rewinding the reader stack
3813 * to the place from where the current macro was called.
3814 */
3815static int
3816roff_return(ROFF_ARGS)
3817{
3818	if (r->mstackpos >= 0)
3819		return ROFF_IGN | ROFF_USERRET;
3820
3821	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3822	return ROFF_IGN;
3823}
3824
3825static int
3826roff_rn(ROFF_ARGS)
3827{
3828	const char	*value;
3829	char		*oldn, *newn, *end;
3830	size_t		 oldsz, newsz;
3831	int		 deftype;
3832
3833	oldn = newn = buf->buf + pos;
3834	if (*oldn == '\0')
3835		return ROFF_IGN;
3836
3837	oldsz = roff_getname(r, &newn, ln, pos);
3838	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3839		return ROFF_IGN;
3840
3841	end = newn;
3842	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3843	if (newsz == 0)
3844		return ROFF_IGN;
3845
3846	deftype = ROFFDEF_ANY;
3847	value = roff_getstrn(r, oldn, oldsz, &deftype);
3848	switch (deftype) {
3849	case ROFFDEF_USER:
3850		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3851		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3852		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3853		break;
3854	case ROFFDEF_PRE:
3855		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3856		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3857		break;
3858	case ROFFDEF_REN:
3859		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3860		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3861		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3862		break;
3863	case ROFFDEF_STD:
3864		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3865		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3866		break;
3867	default:
3868		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3869		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3870		break;
3871	}
3872	return ROFF_IGN;
3873}
3874
3875static int
3876roff_shift(ROFF_ARGS)
3877{
3878	struct mctx	*ctx;
3879	int		 argpos, levels, i;
3880
3881	argpos = pos;
3882	levels = 1;
3883	if (buf->buf[pos] != '\0' &&
3884	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3885		mandoc_msg(MANDOCERR_CE_NONUM,
3886		    ln, pos, "shift %s", buf->buf + pos);
3887		levels = 1;
3888	}
3889	if (r->mstackpos < 0) {
3890		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3891		return ROFF_IGN;
3892	}
3893	ctx = r->mstack + r->mstackpos;
3894	if (levels > ctx->argc) {
3895		mandoc_msg(MANDOCERR_SHIFT,
3896		    ln, argpos, "%d, but max is %d", levels, ctx->argc);
3897		levels = ctx->argc;
3898	}
3899	if (levels < 0) {
3900		mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3901		levels = 0;
3902	}
3903	if (levels == 0)
3904		return ROFF_IGN;
3905	for (i = 0; i < levels; i++)
3906		free(ctx->argv[i]);
3907	ctx->argc -= levels;
3908	for (i = 0; i < ctx->argc; i++)
3909		ctx->argv[i] = ctx->argv[i + levels];
3910	return ROFF_IGN;
3911}
3912
3913static int
3914roff_so(ROFF_ARGS)
3915{
3916	char *name, *cp;
3917
3918	name = buf->buf + pos;
3919	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3920
3921	/*
3922	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3923	 * opening anything that's not in our cwd or anything beneath
3924	 * it.  Thus, explicitly disallow traversing up the file-system
3925	 * or using absolute paths.
3926	 */
3927
3928	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3929		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3930		buf->sz = mandoc_asprintf(&cp,
3931		    ".sp\nSee the file %s.\n.sp", name) + 1;
3932		free(buf->buf);
3933		buf->buf = cp;
3934		*offs = 0;
3935		return ROFF_REPARSE;
3936	}
3937
3938	*offs = pos;
3939	return ROFF_SO;
3940}
3941
3942/* --- user defined strings and macros ------------------------------------ */
3943
3944static int
3945roff_userdef(ROFF_ARGS)
3946{
3947	struct mctx	 *ctx;
3948	char		 *arg, *ap, *dst, *src;
3949	size_t		  sz;
3950
3951	/* If the macro is empty, ignore it altogether. */
3952
3953	if (*r->current_string == '\0')
3954		return ROFF_IGN;
3955
3956	/* Initialize a new macro stack context. */
3957
3958	if (++r->mstackpos == r->mstacksz) {
3959		r->mstack = mandoc_recallocarray(r->mstack,
3960		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3961		r->mstacksz += 8;
3962	}
3963	ctx = r->mstack + r->mstackpos;
3964	ctx->argc = 0;
3965
3966	/*
3967	 * Collect pointers to macro argument strings,
3968	 * NUL-terminating them and escaping quotes.
3969	 */
3970
3971	src = buf->buf + pos;
3972	while (*src != '\0') {
3973		if (ctx->argc == ctx->argsz) {
3974			ctx->argsz += 8;
3975			ctx->argv = mandoc_reallocarray(ctx->argv,
3976			    ctx->argsz, sizeof(*ctx->argv));
3977		}
3978		arg = roff_getarg(r, &src, ln, &pos);
3979		sz = 1;  /* For the terminating NUL. */
3980		for (ap = arg; *ap != '\0'; ap++)
3981			sz += *ap == '"' ? 4 : 1;
3982		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3983		for (ap = arg; *ap != '\0'; ap++) {
3984			if (*ap == '"') {
3985				memcpy(dst, "\\(dq", 4);
3986				dst += 4;
3987			} else
3988				*dst++ = *ap;
3989		}
3990		*dst = '\0';
3991		free(arg);
3992	}
3993
3994	/* Replace the macro invocation by the macro definition. */
3995
3996	free(buf->buf);
3997	buf->buf = mandoc_strdup(r->current_string);
3998	buf->sz = strlen(buf->buf) + 1;
3999	*offs = 0;
4000
4001	return buf->buf[buf->sz - 2] == '\n' ?
4002	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4003}
4004
4005/*
4006 * Calling a high-level macro that was renamed with .rn.
4007 * r->current_string has already been set up by roff_parse().
4008 */
4009static int
4010roff_renamed(ROFF_ARGS)
4011{
4012	char	*nbuf;
4013
4014	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4015	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4016	free(buf->buf);
4017	buf->buf = nbuf;
4018	*offs = 0;
4019	return ROFF_CONT;
4020}
4021
4022/*
4023 * Measure the length in bytes of the roff identifier at *cpp
4024 * and advance the pointer to the next word.
4025 */
4026static size_t
4027roff_getname(struct roff *r, char **cpp, int ln, int pos)
4028{
4029	char	 *name, *cp;
4030	int	  namesz, inam, iend;
4031
4032	name = *cpp;
4033	if (*name == '\0')
4034		return 0;
4035
4036	/* Advance cp to the byte after the end of the name. */
4037
4038	cp = name;
4039	namesz = 0;
4040	for (;;) {
4041		if (*cp == '\0')
4042			break;
4043		if (*cp == ' ' || *cp == '\t') {
4044			cp++;
4045			break;
4046		}
4047		if (*cp != '\\') {
4048			if (name + namesz < cp) {
4049				name[namesz] = *cp;
4050				*cp = ' ';
4051			}
4052			namesz++;
4053			cp++;
4054			continue;
4055		}
4056		if (cp[1] == '{' || cp[1] == '}')
4057			break;
4058		if (roff_escape(cp, 0, 0, NULL, &inam,
4059		    NULL, NULL, &iend) != ESCAPE_UNDEF) {
4060			mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4061			    "%.*s%.*s", namesz, name, iend, cp);
4062			cp += iend;
4063			break;
4064		}
4065
4066		/*
4067		 * In an identifier, \\, \., \G and so on
4068		 * are reduced to \, ., G and so on,
4069		 * vaguely similar to copy mode.
4070		 */
4071
4072		name[namesz++] = cp[inam];
4073		while (iend--) {
4074			if (cp >= name + namesz)
4075				*cp = ' ';
4076			cp++;
4077		}
4078	}
4079
4080	/* Read past spaces. */
4081
4082	while (*cp == ' ')
4083		cp++;
4084
4085	*cpp = cp;
4086	return namesz;
4087}
4088
4089/*
4090 * Store *string into the user-defined string called *name.
4091 * To clear an existing entry, call with (*r, *name, NULL, 0).
4092 * append == 0: replace mode
4093 * append == 1: single-line append mode
4094 * append == 2: multiline append mode, append '\n' after each call
4095 */
4096static void
4097roff_setstr(struct roff *r, const char *name, const char *string,
4098	int append)
4099{
4100	size_t	 namesz;
4101
4102	namesz = strlen(name);
4103	roff_setstrn(&r->strtab, name, namesz, string,
4104	    string ? strlen(string) : 0, append);
4105	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4106}
4107
4108static void
4109roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4110		const char *string, size_t stringsz, int append)
4111{
4112	struct roffkv	*n;
4113	char		*c;
4114	int		 i;
4115	size_t		 oldch, newch;
4116
4117	/* Search for an existing string with the same name. */
4118	n = *r;
4119
4120	while (n && (namesz != n->key.sz ||
4121			strncmp(n->key.p, name, namesz)))
4122		n = n->next;
4123
4124	if (NULL == n) {
4125		/* Create a new string table entry. */
4126		n = mandoc_malloc(sizeof(struct roffkv));
4127		n->key.p = mandoc_strndup(name, namesz);
4128		n->key.sz = namesz;
4129		n->val.p = NULL;
4130		n->val.sz = 0;
4131		n->next = *r;
4132		*r = n;
4133	} else if (0 == append) {
4134		free(n->val.p);
4135		n->val.p = NULL;
4136		n->val.sz = 0;
4137	}
4138
4139	if (NULL == string)
4140		return;
4141
4142	/*
4143	 * One additional byte for the '\n' in multiline mode,
4144	 * and one for the terminating '\0'.
4145	 */
4146	newch = stringsz + (1 < append ? 2u : 1u);
4147
4148	if (NULL == n->val.p) {
4149		n->val.p = mandoc_malloc(newch);
4150		*n->val.p = '\0';
4151		oldch = 0;
4152	} else {
4153		oldch = n->val.sz;
4154		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4155	}
4156
4157	/* Skip existing content in the destination buffer. */
4158	c = n->val.p + (int)oldch;
4159
4160	/* Append new content to the destination buffer. */
4161	i = 0;
4162	while (i < (int)stringsz) {
4163		/*
4164		 * Rudimentary roff copy mode:
4165		 * Handle escaped backslashes.
4166		 */
4167		if ('\\' == string[i] && '\\' == string[i + 1])
4168			i++;
4169		*c++ = string[i++];
4170	}
4171
4172	/* Append terminating bytes. */
4173	if (1 < append)
4174		*c++ = '\n';
4175
4176	*c = '\0';
4177	n->val.sz = (int)(c - n->val.p);
4178}
4179
4180static const char *
4181roff_getstrn(struct roff *r, const char *name, size_t len,
4182    int *deftype)
4183{
4184	const struct roffkv	*n;
4185	int			 found, i;
4186	enum roff_tok		 tok;
4187
4188	found = 0;
4189	for (n = r->strtab; n != NULL; n = n->next) {
4190		if (strncmp(name, n->key.p, len) != 0 ||
4191		    n->key.p[len] != '\0' || n->val.p == NULL)
4192			continue;
4193		if (*deftype & ROFFDEF_USER) {
4194			*deftype = ROFFDEF_USER;
4195			return n->val.p;
4196		} else {
4197			found = 1;
4198			break;
4199		}
4200	}
4201	for (n = r->rentab; n != NULL; n = n->next) {
4202		if (strncmp(name, n->key.p, len) != 0 ||
4203		    n->key.p[len] != '\0' || n->val.p == NULL)
4204			continue;
4205		if (*deftype & ROFFDEF_REN) {
4206			*deftype = ROFFDEF_REN;
4207			return n->val.p;
4208		} else {
4209			found = 1;
4210			break;
4211		}
4212	}
4213	for (i = 0; i < PREDEFS_MAX; i++) {
4214		if (strncmp(name, predefs[i].name, len) != 0 ||
4215		    predefs[i].name[len] != '\0')
4216			continue;
4217		if (*deftype & ROFFDEF_PRE) {
4218			*deftype = ROFFDEF_PRE;
4219			return predefs[i].str;
4220		} else {
4221			found = 1;
4222			break;
4223		}
4224	}
4225	if (r->man->meta.macroset != MACROSET_MAN) {
4226		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4227			if (strncmp(name, roff_name[tok], len) != 0 ||
4228			    roff_name[tok][len] != '\0')
4229				continue;
4230			if (*deftype & ROFFDEF_STD) {
4231				*deftype = ROFFDEF_STD;
4232				return NULL;
4233			} else {
4234				found = 1;
4235				break;
4236			}
4237		}
4238	}
4239	if (r->man->meta.macroset != MACROSET_MDOC) {
4240		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4241			if (strncmp(name, roff_name[tok], len) != 0 ||
4242			    roff_name[tok][len] != '\0')
4243				continue;
4244			if (*deftype & ROFFDEF_STD) {
4245				*deftype = ROFFDEF_STD;
4246				return NULL;
4247			} else {
4248				found = 1;
4249				break;
4250			}
4251		}
4252	}
4253
4254	if (found == 0 && *deftype != ROFFDEF_ANY) {
4255		if (*deftype & ROFFDEF_REN) {
4256			/*
4257			 * This might still be a request,
4258			 * so do not treat it as undefined yet.
4259			 */
4260			*deftype = ROFFDEF_UNDEF;
4261			return NULL;
4262		}
4263
4264		/* Using an undefined string defines it to be empty. */
4265
4266		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4267		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4268	}
4269
4270	*deftype = 0;
4271	return NULL;
4272}
4273
4274static void
4275roff_freestr(struct roffkv *r)
4276{
4277	struct roffkv	 *n, *nn;
4278
4279	for (n = r; n; n = nn) {
4280		free(n->key.p);
4281		free(n->val.p);
4282		nn = n->next;
4283		free(n);
4284	}
4285}
4286
4287/* --- accessors and utility functions ------------------------------------ */
4288
4289/*
4290 * Duplicate an input string, making the appropriate character
4291 * conversations (as stipulated by `tr') along the way.
4292 * Returns a heap-allocated string with all the replacements made.
4293 */
4294char *
4295roff_strdup(const struct roff *r, const char *p)
4296{
4297	const struct roffkv *cp;
4298	char		*res;
4299	const char	*pp;
4300	size_t		 ssz, sz;
4301	enum mandoc_esc	 esc;
4302
4303	if (NULL == r->xmbtab && NULL == r->xtab)
4304		return mandoc_strdup(p);
4305	else if ('\0' == *p)
4306		return mandoc_strdup("");
4307
4308	/*
4309	 * Step through each character looking for term matches
4310	 * (remember that a `tr' can be invoked with an escape, which is
4311	 * a glyph but the escape is multi-character).
4312	 * We only do this if the character hash has been initialised
4313	 * and the string is >0 length.
4314	 */
4315
4316	res = NULL;
4317	ssz = 0;
4318
4319	while ('\0' != *p) {
4320		assert((unsigned int)*p < 128);
4321		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4322			sz = r->xtab[(int)*p].sz;
4323			res = mandoc_realloc(res, ssz + sz + 1);
4324			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4325			ssz += sz;
4326			p++;
4327			continue;
4328		} else if ('\\' != *p) {
4329			res = mandoc_realloc(res, ssz + 2);
4330			res[ssz++] = *p++;
4331			continue;
4332		}
4333
4334		/* Search for term matches. */
4335		for (cp = r->xmbtab; cp; cp = cp->next)
4336			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4337				break;
4338
4339		if (NULL != cp) {
4340			/*
4341			 * A match has been found.
4342			 * Append the match to the array and move
4343			 * forward by its keysize.
4344			 */
4345			res = mandoc_realloc(res,
4346			    ssz + cp->val.sz + 1);
4347			memcpy(res + ssz, cp->val.p, cp->val.sz);
4348			ssz += cp->val.sz;
4349			p += (int)cp->key.sz;
4350			continue;
4351		}
4352
4353		/*
4354		 * Handle escapes carefully: we need to copy
4355		 * over just the escape itself, or else we might
4356		 * do replacements within the escape itself.
4357		 * Make sure to pass along the bogus string.
4358		 */
4359		pp = p++;
4360		esc = mandoc_escape(&p, NULL, NULL);
4361		if (ESCAPE_ERROR == esc) {
4362			sz = strlen(pp);
4363			res = mandoc_realloc(res, ssz + sz + 1);
4364			memcpy(res + ssz, pp, sz);
4365			break;
4366		}
4367		/*
4368		 * We bail out on bad escapes.
4369		 * No need to warn: we already did so when
4370		 * roff_expand() was called.
4371		 */
4372		sz = (int)(p - pp);
4373		res = mandoc_realloc(res, ssz + sz + 1);
4374		memcpy(res + ssz, pp, sz);
4375		ssz += sz;
4376	}
4377
4378	res[(int)ssz] = '\0';
4379	return res;
4380}
4381
4382int
4383roff_getformat(const struct roff *r)
4384{
4385
4386	return r->format;
4387}
4388
4389/*
4390 * Find out whether a line is a macro line or not.
4391 * If it is, adjust the current position and return one; if it isn't,
4392 * return zero and don't change the current position.
4393 * If the control character has been set with `.cc', then let that grain
4394 * precedence.
4395 * This is slightly contrary to groff, where using the non-breaking
4396 * control character when `cc' has been invoked will cause the
4397 * non-breaking macro contents to be printed verbatim.
4398 */
4399int
4400roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4401{
4402	int		pos;
4403
4404	pos = *ppos;
4405
4406	if (r->control != '\0' && cp[pos] == r->control)
4407		pos++;
4408	else if (r->control != '\0')
4409		return 0;
4410	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4411		pos += 2;
4412	else if ('.' == cp[pos] || '\'' == cp[pos])
4413		pos++;
4414	else
4415		return 0;
4416
4417	while (' ' == cp[pos] || '\t' == cp[pos])
4418		pos++;
4419
4420	*ppos = pos;
4421	return 1;
4422}
4423