1/*	$Id: roff.c,v 1.366 2019/07/01 22:56:24 schwarze Exp $ */
2/*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stddef.h>
26#include <stdint.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "mandoc_aux.h"
32#include "mandoc_ohash.h"
33#include "mandoc.h"
34#include "roff.h"
35#include "mandoc_parse.h"
36#include "libmandoc.h"
37#include "roff_int.h"
38#include "tbl_parse.h"
39#include "eqn_parse.h"
40
41/*
42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43 * that an escape sequence resulted from copy-in processing and
44 * needs to be checked or interpolated.  As it is used nowhere
45 * else, it is defined here rather than in a header file.
46 */
47#define	ASCII_ESC	27
48
49/* Maximum number of string expansions per line, to break infinite loops. */
50#define	EXPAND_LIMIT	1000
51
52/* Types of definitions of macros and strings. */
53#define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
54#define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
55#define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
56#define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
57#define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
58			 ROFFDEF_REN | ROFFDEF_STD)
59#define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
60
61/* --- data types --------------------------------------------------------- */
62
63/*
64 * An incredibly-simple string buffer.
65 */
66struct	roffstr {
67	char		*p; /* nil-terminated buffer */
68	size_t		 sz; /* saved strlen(p) */
69};
70
71/*
72 * A key-value roffstr pair as part of a singly-linked list.
73 */
74struct	roffkv {
75	struct roffstr	 key;
76	struct roffstr	 val;
77	struct roffkv	*next; /* next in list */
78};
79
80/*
81 * A single number register as part of a singly-linked list.
82 */
83struct	roffreg {
84	struct roffstr	 key;
85	int		 val;
86	int		 step;
87	struct roffreg	*next;
88};
89
90/*
91 * Association of request and macro names with token IDs.
92 */
93struct	roffreq {
94	enum roff_tok	 tok;
95	char		 name[];
96};
97
98/*
99 * A macro processing context.
100 * More than one is needed when macro calls are nested.
101 */
102struct	mctx {
103	char		**argv;
104	int		 argc;
105	int		 argsz;
106};
107
108struct	roff {
109	struct roff_man	*man; /* mdoc or man parser */
110	struct roffnode	*last; /* leaf of stack */
111	struct mctx	*mstack; /* stack of macro contexts */
112	int		*rstack; /* stack of inverted `ie' values */
113	struct ohash	*reqtab; /* request lookup table */
114	struct roffreg	*regtab; /* number registers */
115	struct roffkv	*strtab; /* user-defined strings & macros */
116	struct roffkv	*rentab; /* renamed strings & macros */
117	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
118	struct roffstr	*xtab; /* single-byte trans table (`tr') */
119	const char	*current_string; /* value of last called user macro */
120	struct tbl_node	*first_tbl; /* first table parsed */
121	struct tbl_node	*last_tbl; /* last table parsed */
122	struct tbl_node	*tbl; /* current table being parsed */
123	struct eqn_node	*last_eqn; /* equation parser */
124	struct eqn_node	*eqn; /* active equation parser */
125	int		 eqn_inline; /* current equation is inline */
126	int		 options; /* parse options */
127	int		 mstacksz; /* current size of mstack */
128	int		 mstackpos; /* position in mstack */
129	int		 rstacksz; /* current size limit of rstack */
130	int		 rstackpos; /* position in rstack */
131	int		 format; /* current file in mdoc or man format */
132	char		 control; /* control character */
133	char		 escape; /* escape character */
134};
135
136/*
137 * A macro definition, condition, or ignored block.
138 */
139struct	roffnode {
140	enum roff_tok	 tok; /* type of node */
141	struct roffnode	*parent; /* up one in stack */
142	int		 line; /* parse line */
143	int		 col; /* parse col */
144	char		*name; /* node name, e.g. macro name */
145	char		*end; /* custom end macro of the block */
146	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
147	int		 rule; /* content is: 1=evaluated 0=skipped */
148};
149
150#define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
151			 enum roff_tok tok, /* tok of macro */ \
152			 struct buf *buf, /* input buffer */ \
153			 int ln, /* parse line */ \
154			 int ppos, /* original pos in buffer */ \
155			 int pos, /* current pos in buffer */ \
156			 int *offs /* reset offset of buffer data */
157
158typedef	int (*roffproc)(ROFF_ARGS);
159
160struct	roffmac {
161	roffproc	 proc; /* process new macro */
162	roffproc	 text; /* process as child text of macro */
163	roffproc	 sub; /* process as child of macro */
164	int		 flags;
165#define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
166};
167
168struct	predef {
169	const char	*name; /* predefined input name */
170	const char	*str; /* replacement symbol */
171};
172
173#define	PREDEF(__name, __str) \
174	{ (__name), (__str) },
175
176/* --- function prototypes ------------------------------------------------ */
177
178static	int		 roffnode_cleanscope(struct roff *);
179static	int		 roffnode_pop(struct roff *);
180static	void		 roffnode_push(struct roff *, enum roff_tok,
181				const char *, int, int);
182static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
183static	int		 roff_als(ROFF_ARGS);
184static	int		 roff_block(ROFF_ARGS);
185static	int		 roff_block_text(ROFF_ARGS);
186static	int		 roff_block_sub(ROFF_ARGS);
187static	int		 roff_break(ROFF_ARGS);
188static	int		 roff_cblock(ROFF_ARGS);
189static	int		 roff_cc(ROFF_ARGS);
190static	int		 roff_ccond(struct roff *, int, int);
191static	int		 roff_char(ROFF_ARGS);
192static	int		 roff_cond(ROFF_ARGS);
193static	int		 roff_cond_text(ROFF_ARGS);
194static	int		 roff_cond_sub(ROFF_ARGS);
195static	int		 roff_ds(ROFF_ARGS);
196static	int		 roff_ec(ROFF_ARGS);
197static	int		 roff_eo(ROFF_ARGS);
198static	int		 roff_eqndelim(struct roff *, struct buf *, int);
199static	int		 roff_evalcond(struct roff *r, int, char *, int *);
200static	int		 roff_evalnum(struct roff *, int,
201				const char *, int *, int *, int);
202static	int		 roff_evalpar(struct roff *, int,
203				const char *, int *, int *, int);
204static	int		 roff_evalstrcond(const char *, int *);
205static	int		 roff_expand(struct roff *, struct buf *,
206				int, int, char);
207static	void		 roff_free1(struct roff *);
208static	void		 roff_freereg(struct roffreg *);
209static	void		 roff_freestr(struct roffkv *);
210static	size_t		 roff_getname(struct roff *, char **, int, int);
211static	int		 roff_getnum(const char *, int *, int *, int);
212static	int		 roff_getop(const char *, int *, char *);
213static	int		 roff_getregn(struct roff *,
214				const char *, size_t, char);
215static	int		 roff_getregro(const struct roff *,
216				const char *name);
217static	const char	*roff_getstrn(struct roff *,
218				const char *, size_t, int *);
219static	int		 roff_hasregn(const struct roff *,
220				const char *, size_t);
221static	int		 roff_insec(ROFF_ARGS);
222static	int		 roff_it(ROFF_ARGS);
223static	int		 roff_line_ignore(ROFF_ARGS);
224static	void		 roff_man_alloc1(struct roff_man *);
225static	void		 roff_man_free1(struct roff_man *);
226static	int		 roff_manyarg(ROFF_ARGS);
227static	int		 roff_noarg(ROFF_ARGS);
228static	int		 roff_nop(ROFF_ARGS);
229static	int		 roff_nr(ROFF_ARGS);
230static	int		 roff_onearg(ROFF_ARGS);
231static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
232				int, int);
233static	int		 roff_parsetext(struct roff *, struct buf *,
234				int, int *);
235static	int		 roff_renamed(ROFF_ARGS);
236static	int		 roff_return(ROFF_ARGS);
237static	int		 roff_rm(ROFF_ARGS);
238static	int		 roff_rn(ROFF_ARGS);
239static	int		 roff_rr(ROFF_ARGS);
240static	void		 roff_setregn(struct roff *, const char *,
241				size_t, int, char, int);
242static	void		 roff_setstr(struct roff *,
243				const char *, const char *, int);
244static	void		 roff_setstrn(struct roffkv **, const char *,
245				size_t, const char *, size_t, int);
246static	int		 roff_shift(ROFF_ARGS);
247static	int		 roff_so(ROFF_ARGS);
248static	int		 roff_tr(ROFF_ARGS);
249static	int		 roff_Dd(ROFF_ARGS);
250static	int		 roff_TE(ROFF_ARGS);
251static	int		 roff_TS(ROFF_ARGS);
252static	int		 roff_EQ(ROFF_ARGS);
253static	int		 roff_EN(ROFF_ARGS);
254static	int		 roff_T_(ROFF_ARGS);
255static	int		 roff_unsupp(ROFF_ARGS);
256static	int		 roff_userdef(ROFF_ARGS);
257
258/* --- constant data ------------------------------------------------------ */
259
260#define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
261#define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
262
263const char *__roff_name[MAN_MAX + 1] = {
264	"br",		"ce",		"fi",		"ft",
265	"ll",		"mc",		"nf",
266	"po",		"rj",		"sp",
267	"ta",		"ti",		NULL,
268	"ab",		"ad",		"af",		"aln",
269	"als",		"am",		"am1",		"ami",
270	"ami1",		"as",		"as1",		"asciify",
271	"backtrace",	"bd",		"bleedat",	"blm",
272        "box",		"boxa",		"bp",		"BP",
273	"break",	"breakchar",	"brnl",		"brp",
274	"brpnl",	"c2",		"cc",
275	"cf",		"cflags",	"ch",		"char",
276	"chop",		"class",	"close",	"CL",
277	"color",	"composite",	"continue",	"cp",
278	"cropat",	"cs",		"cu",		"da",
279	"dch",		"Dd",		"de",		"de1",
280	"defcolor",	"dei",		"dei1",		"device",
281	"devicem",	"di",		"do",		"ds",
282	"ds1",		"dwh",		"dt",		"ec",
283	"ecr",		"ecs",		"el",		"em",
284	"EN",		"eo",		"EP",		"EQ",
285	"errprint",	"ev",		"evc",		"ex",
286	"fallback",	"fam",		"fc",		"fchar",
287	"fcolor",	"fdeferlig",	"feature",	"fkern",
288	"fl",		"flig",		"fp",		"fps",
289	"fschar",	"fspacewidth",	"fspecial",	"ftr",
290	"fzoom",	"gcolor",	"hc",		"hcode",
291	"hidechar",	"hla",		"hlm",		"hpf",
292	"hpfa",		"hpfcode",	"hw",		"hy",
293	"hylang",	"hylen",	"hym",		"hypp",
294	"hys",		"ie",		"if",		"ig",
295	"index",	"it",		"itc",		"IX",
296	"kern",		"kernafter",	"kernbefore",	"kernpair",
297	"lc",		"lc_ctype",	"lds",		"length",
298	"letadj",	"lf",		"lg",		"lhang",
299	"linetabs",	"lnr",		"lnrf",		"lpfx",
300	"ls",		"lsm",		"lt",
301	"mediasize",	"minss",	"mk",		"mso",
302	"na",		"ne",		"nh",		"nhychar",
303	"nm",		"nn",		"nop",		"nr",
304	"nrf",		"nroff",	"ns",		"nx",
305	"open",		"opena",	"os",		"output",
306	"padj",		"papersize",	"pc",		"pev",
307	"pi",		"PI",		"pl",		"pm",
308	"pn",		"pnr",		"ps",
309	"psbb",		"pshape",	"pso",		"ptr",
310	"pvs",		"rchar",	"rd",		"recursionlimit",
311	"return",	"rfschar",	"rhang",
312	"rm",		"rn",		"rnn",		"rr",
313	"rs",		"rt",		"schar",	"sentchar",
314	"shc",		"shift",	"sizes",	"so",
315	"spacewidth",	"special",	"spreadwarn",	"ss",
316	"sty",		"substring",	"sv",		"sy",
317	"T&",		"tc",		"TE",
318	"TH",		"tkf",		"tl",
319	"tm",		"tm1",		"tmc",		"tr",
320	"track",	"transchar",	"trf",		"trimat",
321	"trin",		"trnt",		"troff",	"TS",
322	"uf",		"ul",		"unformat",	"unwatch",
323	"unwatchn",	"vpt",		"vs",		"warn",
324	"warnscale",	"watch",	"watchlength",	"watchn",
325	"wh",		"while",	"write",	"writec",
326	"writem",	"xflag",	".",		NULL,
327	NULL,		"text",
328	"Dd",		"Dt",		"Os",		"Sh",
329	"Ss",		"Pp",		"D1",		"Dl",
330	"Bd",		"Ed",		"Bl",		"El",
331	"It",		"Ad",		"An",		"Ap",
332	"Ar",		"Cd",		"Cm",		"Dv",
333	"Er",		"Ev",		"Ex",		"Fa",
334	"Fd",		"Fl",		"Fn",		"Ft",
335	"Ic",		"In",		"Li",		"Nd",
336	"Nm",		"Op",		"Ot",		"Pa",
337	"Rv",		"St",		"Va",		"Vt",
338	"Xr",		"%A",		"%B",		"%D",
339	"%I",		"%J",		"%N",		"%O",
340	"%P",		"%R",		"%T",		"%V",
341	"Ac",		"Ao",		"Aq",		"At",
342	"Bc",		"Bf",		"Bo",		"Bq",
343	"Bsx",		"Bx",		"Db",		"Dc",
344	"Do",		"Dq",		"Ec",		"Ef",
345	"Em",		"Eo",		"Fx",		"Ms",
346	"No",		"Ns",		"Nx",		"Ox",
347	"Pc",		"Pf",		"Po",		"Pq",
348	"Qc",		"Ql",		"Qo",		"Qq",
349	"Re",		"Rs",		"Sc",		"So",
350	"Sq",		"Sm",		"Sx",		"Sy",
351	"Tn",		"Ux",		"Xc",		"Xo",
352	"Fo",		"Fc",		"Oo",		"Oc",
353	"Bk",		"Ek",		"Bt",		"Hf",
354	"Fr",		"Ud",		"Lb",		"Lp",
355	"Lk",		"Mt",		"Brq",		"Bro",
356	"Brc",		"%C",		"Es",		"En",
357	"Dx",		"%Q",		"%U",		"Ta",
358	NULL,
359	"TH",		"SH",		"SS",		"TP",
360	"TQ",
361	"LP",		"PP",		"P",		"IP",
362	"HP",		"SM",		"SB",		"BI",
363	"IB",		"BR",		"RB",		"R",
364	"B",		"I",		"IR",		"RI",
365	"RE",		"RS",		"DT",		"UC",
366	"PD",		"AT",		"in",
367	"SY",		"YS",		"OP",
368	"EX",		"EE",		"UR",
369	"UE",		"MT",		"ME",		NULL
370};
371const	char *const *roff_name = __roff_name;
372
373static	struct roffmac	 roffs[TOKEN_NONE] = {
374	{ roff_noarg, NULL, NULL, 0 },  /* br */
375	{ roff_onearg, NULL, NULL, 0 },  /* ce */
376	{ roff_noarg, NULL, NULL, 0 },  /* fi */
377	{ roff_onearg, NULL, NULL, 0 },  /* ft */
378	{ roff_onearg, NULL, NULL, 0 },  /* ll */
379	{ roff_onearg, NULL, NULL, 0 },  /* mc */
380	{ roff_noarg, NULL, NULL, 0 },  /* nf */
381	{ roff_onearg, NULL, NULL, 0 },  /* po */
382	{ roff_onearg, NULL, NULL, 0 },  /* rj */
383	{ roff_onearg, NULL, NULL, 0 },  /* sp */
384	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
385	{ roff_onearg, NULL, NULL, 0 },  /* ti */
386	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
387	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
388	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
389	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
390	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
391	{ roff_als, NULL, NULL, 0 },  /* als */
392	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
393	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
394	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
395	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
396	{ roff_ds, NULL, NULL, 0 },  /* as */
397	{ roff_ds, NULL, NULL, 0 },  /* as1 */
398	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
399	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
400	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
401	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
402	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
403	{ roff_unsupp, NULL, NULL, 0 },  /* box */
404	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
405	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
406	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
407	{ roff_break, NULL, NULL, 0 },  /* break */
408	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
409	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
410	{ roff_noarg, NULL, NULL, 0 },  /* brp */
411	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
412	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
413	{ roff_cc, NULL, NULL, 0 },  /* cc */
414	{ roff_insec, NULL, NULL, 0 },  /* cf */
415	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
416	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
417	{ roff_char, NULL, NULL, 0 },  /* char */
418	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
419	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
420	{ roff_insec, NULL, NULL, 0 },  /* close */
421	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
422	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
423	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
424	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
425	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
426	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
427	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
428	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
429	{ roff_unsupp, NULL, NULL, 0 },  /* da */
430	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
431	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
432	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
433	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
434	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
435	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
436	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
437	{ roff_unsupp, NULL, NULL, 0 },  /* device */
438	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
439	{ roff_unsupp, NULL, NULL, 0 },  /* di */
440	{ roff_unsupp, NULL, NULL, 0 },  /* do */
441	{ roff_ds, NULL, NULL, 0 },  /* ds */
442	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
443	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
444	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
445	{ roff_ec, NULL, NULL, 0 },  /* ec */
446	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
447	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
448	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
449	{ roff_unsupp, NULL, NULL, 0 },  /* em */
450	{ roff_EN, NULL, NULL, 0 },  /* EN */
451	{ roff_eo, NULL, NULL, 0 },  /* eo */
452	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
453	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
454	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
455	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
456	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
457	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
458	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
459	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
460	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
461	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
462	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
463	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
464	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
465	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
466	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
467	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
468	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
469	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
470	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
471	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
472	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
473	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
474	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
475	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
476	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
477	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
478	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
479	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
480	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
481	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
482	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
483	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
484	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
485	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
486	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
487	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
488	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
489	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
490	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
491	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
492	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
493	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
494	{ roff_unsupp, NULL, NULL, 0 },  /* index */
495	{ roff_it, NULL, NULL, 0 },  /* it */
496	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
497	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
498	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
499	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
500	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
501	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
502	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
503	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
504	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
505	{ roff_unsupp, NULL, NULL, 0 },  /* length */
506	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
507	{ roff_insec, NULL, NULL, 0 },  /* lf */
508	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
509	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
510	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
511	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
512	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
513	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
514	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
515	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
516	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
517	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
518	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
519	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
520	{ roff_insec, NULL, NULL, 0 },  /* mso */
521	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
522	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
523	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
524	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
525	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
526	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
527	{ roff_nop, NULL, NULL, 0 },  /* nop */
528	{ roff_nr, NULL, NULL, 0 },  /* nr */
529	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
530	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
531	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
532	{ roff_insec, NULL, NULL, 0 },  /* nx */
533	{ roff_insec, NULL, NULL, 0 },  /* open */
534	{ roff_insec, NULL, NULL, 0 },  /* opena */
535	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
536	{ roff_unsupp, NULL, NULL, 0 },  /* output */
537	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
538	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
539	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
540	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
541	{ roff_insec, NULL, NULL, 0 },  /* pi */
542	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
543	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
544	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
545	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
546	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
547	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
548	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
549	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
550	{ roff_insec, NULL, NULL, 0 },  /* pso */
551	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
552	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
553	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
554	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
555	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
556	{ roff_return, NULL, NULL, 0 },  /* return */
557	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
558	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
559	{ roff_rm, NULL, NULL, 0 },  /* rm */
560	{ roff_rn, NULL, NULL, 0 },  /* rn */
561	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
562	{ roff_rr, NULL, NULL, 0 },  /* rr */
563	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
564	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
565	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
566	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
567	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
568	{ roff_shift, NULL, NULL, 0 },  /* shift */
569	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
570	{ roff_so, NULL, NULL, 0 },  /* so */
571	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
572	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
573	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
574	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
575	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
576	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
577	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
578	{ roff_insec, NULL, NULL, 0 },  /* sy */
579	{ roff_T_, NULL, NULL, 0 },  /* T& */
580	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
581	{ roff_TE, NULL, NULL, 0 },  /* TE */
582	{ roff_Dd, NULL, NULL, 0 },  /* TH */
583	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
584	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
585	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
586	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
587	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
588	{ roff_tr, NULL, NULL, 0 },  /* tr */
589	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
590	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
591	{ roff_insec, NULL, NULL, 0 },  /* trf */
592	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
593	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
594	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
595	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
596	{ roff_TS, NULL, NULL, 0 },  /* TS */
597	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
598	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
599	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
600	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
601	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
602	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
603	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
604	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
605	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
606	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
607	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
608	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
609	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
610	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
611	{ roff_insec, NULL, NULL, 0 },  /* write */
612	{ roff_insec, NULL, NULL, 0 },  /* writec */
613	{ roff_insec, NULL, NULL, 0 },  /* writem */
614	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
615	{ roff_cblock, NULL, NULL, 0 },  /* . */
616	{ roff_renamed, NULL, NULL, 0 },
617	{ roff_userdef, NULL, NULL, 0 }
618};
619
620/* Array of injected predefined strings. */
621#define	PREDEFS_MAX	 38
622static	const struct predef predefs[PREDEFS_MAX] = {
623#include "predefs.in"
624};
625
626static	int	 roffce_lines;	/* number of input lines to center */
627static	struct roff_node *roffce_node;  /* active request */
628static	int	 roffit_lines;  /* number of lines to delay */
629static	char	*roffit_macro;  /* nil-terminated macro line */
630
631
632/* --- request table ------------------------------------------------------ */
633
634struct ohash *
635roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
636{
637	struct ohash	*htab;
638	struct roffreq	*req;
639	enum roff_tok	 tok;
640	size_t		 sz;
641	unsigned int	 slot;
642
643	htab = mandoc_malloc(sizeof(*htab));
644	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
645
646	for (tok = mintok; tok < maxtok; tok++) {
647		if (roff_name[tok] == NULL)
648			continue;
649		sz = strlen(roff_name[tok]);
650		req = mandoc_malloc(sizeof(*req) + sz + 1);
651		req->tok = tok;
652		memcpy(req->name, roff_name[tok], sz + 1);
653		slot = ohash_qlookup(htab, req->name);
654		ohash_insert(htab, slot, req);
655	}
656	return htab;
657}
658
659void
660roffhash_free(struct ohash *htab)
661{
662	struct roffreq	*req;
663	unsigned int	 slot;
664
665	if (htab == NULL)
666		return;
667	for (req = ohash_first(htab, &slot); req != NULL;
668	     req = ohash_next(htab, &slot))
669		free(req);
670	ohash_delete(htab);
671	free(htab);
672}
673
674enum roff_tok
675roffhash_find(struct ohash *htab, const char *name, size_t sz)
676{
677	struct roffreq	*req;
678	const char	*end;
679
680	if (sz) {
681		end = name + sz;
682		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
683	} else
684		req = ohash_find(htab, ohash_qlookup(htab, name));
685	return req == NULL ? TOKEN_NONE : req->tok;
686}
687
688/* --- stack of request blocks -------------------------------------------- */
689
690/*
691 * Pop the current node off of the stack of roff instructions currently
692 * pending.  Return 1 if it is a loop or 0 otherwise.
693 */
694static int
695roffnode_pop(struct roff *r)
696{
697	struct roffnode	*p;
698	int		 inloop;
699
700	p = r->last;
701	inloop = p->tok == ROFF_while;
702	r->last = p->parent;
703	free(p->name);
704	free(p->end);
705	free(p);
706	return inloop;
707}
708
709/*
710 * Push a roff node onto the instruction stack.  This must later be
711 * removed with roffnode_pop().
712 */
713static void
714roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
715		int line, int col)
716{
717	struct roffnode	*p;
718
719	p = mandoc_calloc(1, sizeof(struct roffnode));
720	p->tok = tok;
721	if (name)
722		p->name = mandoc_strdup(name);
723	p->parent = r->last;
724	p->line = line;
725	p->col = col;
726	p->rule = p->parent ? p->parent->rule : 0;
727
728	r->last = p;
729}
730
731/* --- roff parser state data management ---------------------------------- */
732
733static void
734roff_free1(struct roff *r)
735{
736	int		 i;
737
738	tbl_free(r->first_tbl);
739	r->first_tbl = r->last_tbl = r->tbl = NULL;
740
741	eqn_free(r->last_eqn);
742	r->last_eqn = r->eqn = NULL;
743
744	while (r->mstackpos >= 0)
745		roff_userret(r);
746
747	while (r->last)
748		roffnode_pop(r);
749
750	free (r->rstack);
751	r->rstack = NULL;
752	r->rstacksz = 0;
753	r->rstackpos = -1;
754
755	roff_freereg(r->regtab);
756	r->regtab = NULL;
757
758	roff_freestr(r->strtab);
759	roff_freestr(r->rentab);
760	roff_freestr(r->xmbtab);
761	r->strtab = r->rentab = r->xmbtab = NULL;
762
763	if (r->xtab)
764		for (i = 0; i < 128; i++)
765			free(r->xtab[i].p);
766	free(r->xtab);
767	r->xtab = NULL;
768}
769
770void
771roff_reset(struct roff *r)
772{
773	roff_free1(r);
774	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
775	r->control = '\0';
776	r->escape = '\\';
777	roffce_lines = 0;
778	roffce_node = NULL;
779	roffit_lines = 0;
780	roffit_macro = NULL;
781}
782
783void
784roff_free(struct roff *r)
785{
786	int		 i;
787
788	roff_free1(r);
789	for (i = 0; i < r->mstacksz; i++)
790		free(r->mstack[i].argv);
791	free(r->mstack);
792	roffhash_free(r->reqtab);
793	free(r);
794}
795
796struct roff *
797roff_alloc(int options)
798{
799	struct roff	*r;
800
801	r = mandoc_calloc(1, sizeof(struct roff));
802	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
803	r->options = options;
804	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
805	r->mstackpos = -1;
806	r->rstackpos = -1;
807	r->escape = '\\';
808	return r;
809}
810
811/* --- syntax tree state data management ---------------------------------- */
812
813static void
814roff_man_free1(struct roff_man *man)
815{
816	if (man->meta.first != NULL)
817		roff_node_delete(man, man->meta.first);
818	free(man->meta.msec);
819	free(man->meta.vol);
820	free(man->meta.os);
821	free(man->meta.arch);
822	free(man->meta.title);
823	free(man->meta.name);
824	free(man->meta.date);
825	free(man->meta.sodest);
826}
827
828void
829roff_state_reset(struct roff_man *man)
830{
831	man->last = man->meta.first;
832	man->last_es = NULL;
833	man->flags = 0;
834	man->lastsec = man->lastnamed = SEC_NONE;
835	man->next = ROFF_NEXT_CHILD;
836	roff_setreg(man->roff, "nS", 0, '=');
837}
838
839static void
840roff_man_alloc1(struct roff_man *man)
841{
842	memset(&man->meta, 0, sizeof(man->meta));
843	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
844	man->meta.first->type = ROFFT_ROOT;
845	man->meta.macroset = MACROSET_NONE;
846	roff_state_reset(man);
847}
848
849void
850roff_man_reset(struct roff_man *man)
851{
852	roff_man_free1(man);
853	roff_man_alloc1(man);
854}
855
856void
857roff_man_free(struct roff_man *man)
858{
859	roff_man_free1(man);
860	free(man);
861}
862
863struct roff_man *
864roff_man_alloc(struct roff *roff, const char *os_s, int quick)
865{
866	struct roff_man *man;
867
868	man = mandoc_calloc(1, sizeof(*man));
869	man->roff = roff;
870	man->os_s = os_s;
871	man->quick = quick;
872	roff_man_alloc1(man);
873	roff->man = man;
874	return man;
875}
876
877/* --- syntax tree handling ----------------------------------------------- */
878
879struct roff_node *
880roff_node_alloc(struct roff_man *man, int line, int pos,
881	enum roff_type type, int tok)
882{
883	struct roff_node	*n;
884
885	n = mandoc_calloc(1, sizeof(*n));
886	n->line = line;
887	n->pos = pos;
888	n->tok = tok;
889	n->type = type;
890	n->sec = man->lastsec;
891
892	if (man->flags & MDOC_SYNOPSIS)
893		n->flags |= NODE_SYNPRETTY;
894	else
895		n->flags &= ~NODE_SYNPRETTY;
896	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
897		n->flags |= NODE_NOFILL;
898	else
899		n->flags &= ~NODE_NOFILL;
900	if (man->flags & MDOC_NEWLINE)
901		n->flags |= NODE_LINE;
902	man->flags &= ~MDOC_NEWLINE;
903
904	return n;
905}
906
907void
908roff_node_append(struct roff_man *man, struct roff_node *n)
909{
910
911	switch (man->next) {
912	case ROFF_NEXT_SIBLING:
913		if (man->last->next != NULL) {
914			n->next = man->last->next;
915			man->last->next->prev = n;
916		} else
917			man->last->parent->last = n;
918		man->last->next = n;
919		n->prev = man->last;
920		n->parent = man->last->parent;
921		break;
922	case ROFF_NEXT_CHILD:
923		if (man->last->child != NULL) {
924			n->next = man->last->child;
925			man->last->child->prev = n;
926		} else
927			man->last->last = n;
928		man->last->child = n;
929		n->parent = man->last;
930		break;
931	default:
932		abort();
933	}
934	man->last = n;
935
936	switch (n->type) {
937	case ROFFT_HEAD:
938		n->parent->head = n;
939		break;
940	case ROFFT_BODY:
941		if (n->end != ENDBODY_NOT)
942			return;
943		n->parent->body = n;
944		break;
945	case ROFFT_TAIL:
946		n->parent->tail = n;
947		break;
948	default:
949		return;
950	}
951
952	/*
953	 * Copy over the normalised-data pointer of our parent.  Not
954	 * everybody has one, but copying a null pointer is fine.
955	 */
956
957	n->norm = n->parent->norm;
958	assert(n->parent->type == ROFFT_BLOCK);
959}
960
961void
962roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
963{
964	struct roff_node	*n;
965
966	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
967	n->string = roff_strdup(man->roff, word);
968	roff_node_append(man, n);
969	n->flags |= NODE_VALID | NODE_ENDED;
970	man->next = ROFF_NEXT_SIBLING;
971}
972
973void
974roff_word_append(struct roff_man *man, const char *word)
975{
976	struct roff_node	*n;
977	char			*addstr, *newstr;
978
979	n = man->last;
980	addstr = roff_strdup(man->roff, word);
981	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
982	free(addstr);
983	free(n->string);
984	n->string = newstr;
985	man->next = ROFF_NEXT_SIBLING;
986}
987
988void
989roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
990{
991	struct roff_node	*n;
992
993	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
994	roff_node_append(man, n);
995	man->next = ROFF_NEXT_CHILD;
996}
997
998struct roff_node *
999roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1000{
1001	struct roff_node	*n;
1002
1003	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1004	roff_node_append(man, n);
1005	man->next = ROFF_NEXT_CHILD;
1006	return n;
1007}
1008
1009struct roff_node *
1010roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1011{
1012	struct roff_node	*n;
1013
1014	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1015	roff_node_append(man, n);
1016	man->next = ROFF_NEXT_CHILD;
1017	return n;
1018}
1019
1020struct roff_node *
1021roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1022{
1023	struct roff_node	*n;
1024
1025	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1026	roff_node_append(man, n);
1027	man->next = ROFF_NEXT_CHILD;
1028	return n;
1029}
1030
1031static void
1032roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1033{
1034	struct roff_node	*n;
1035	struct tbl_span		*span;
1036
1037	if (man->meta.macroset == MACROSET_MAN)
1038		man_breakscope(man, ROFF_TS);
1039	while ((span = tbl_span(tbl)) != NULL) {
1040		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1041		n->span = span;
1042		roff_node_append(man, n);
1043		n->flags |= NODE_VALID | NODE_ENDED;
1044		man->next = ROFF_NEXT_SIBLING;
1045	}
1046}
1047
1048void
1049roff_node_unlink(struct roff_man *man, struct roff_node *n)
1050{
1051
1052	/* Adjust siblings. */
1053
1054	if (n->prev)
1055		n->prev->next = n->next;
1056	if (n->next)
1057		n->next->prev = n->prev;
1058
1059	/* Adjust parent. */
1060
1061	if (n->parent != NULL) {
1062		if (n->parent->child == n)
1063			n->parent->child = n->next;
1064		if (n->parent->last == n)
1065			n->parent->last = n->prev;
1066	}
1067
1068	/* Adjust parse point. */
1069
1070	if (man == NULL)
1071		return;
1072	if (man->last == n) {
1073		if (n->prev == NULL) {
1074			man->last = n->parent;
1075			man->next = ROFF_NEXT_CHILD;
1076		} else {
1077			man->last = n->prev;
1078			man->next = ROFF_NEXT_SIBLING;
1079		}
1080	}
1081	if (man->meta.first == n)
1082		man->meta.first = NULL;
1083}
1084
1085void
1086roff_node_relink(struct roff_man *man, struct roff_node *n)
1087{
1088	roff_node_unlink(man, n);
1089	n->prev = n->next = NULL;
1090	roff_node_append(man, n);
1091}
1092
1093void
1094roff_node_free(struct roff_node *n)
1095{
1096
1097	if (n->args != NULL)
1098		mdoc_argv_free(n->args);
1099	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1100		free(n->norm);
1101	eqn_box_free(n->eqn);
1102	free(n->string);
1103	free(n);
1104}
1105
1106void
1107roff_node_delete(struct roff_man *man, struct roff_node *n)
1108{
1109
1110	while (n->child != NULL)
1111		roff_node_delete(man, n->child);
1112	roff_node_unlink(man, n);
1113	roff_node_free(n);
1114}
1115
1116void
1117deroff(char **dest, const struct roff_node *n)
1118{
1119	char	*cp;
1120	size_t	 sz;
1121
1122	if (n->type != ROFFT_TEXT) {
1123		for (n = n->child; n != NULL; n = n->next)
1124			deroff(dest, n);
1125		return;
1126	}
1127
1128	/* Skip leading whitespace. */
1129
1130	for (cp = n->string; *cp != '\0'; cp++) {
1131		if (cp[0] == '\\' && cp[1] != '\0' &&
1132		    strchr(" %&0^|~", cp[1]) != NULL)
1133			cp++;
1134		else if ( ! isspace((unsigned char)*cp))
1135			break;
1136	}
1137
1138	/* Skip trailing backslash. */
1139
1140	sz = strlen(cp);
1141	if (sz > 0 && cp[sz - 1] == '\\')
1142		sz--;
1143
1144	/* Skip trailing whitespace. */
1145
1146	for (; sz; sz--)
1147		if ( ! isspace((unsigned char)cp[sz-1]))
1148			break;
1149
1150	/* Skip empty strings. */
1151
1152	if (sz == 0)
1153		return;
1154
1155	if (*dest == NULL) {
1156		*dest = mandoc_strndup(cp, sz);
1157		return;
1158	}
1159
1160	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1161	free(*dest);
1162	*dest = cp;
1163}
1164
1165/* --- main functions of the roff parser ---------------------------------- */
1166
1167/*
1168 * In the current line, expand escape sequences that produce parsable
1169 * input text.  Also check the syntax of the remaining escape sequences,
1170 * which typically produce output glyphs or change formatter state.
1171 */
1172static int
1173roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1174{
1175	struct mctx	*ctx;	/* current macro call context */
1176	char		 ubuf[24]; /* buffer to print the number */
1177	struct roff_node *n;	/* used for header comments */
1178	const char	*start;	/* start of the string to process */
1179	char		*stesc;	/* start of an escape sequence ('\\') */
1180	const char	*esct;	/* type of esccape sequence */
1181	char		*ep;	/* end of comment string */
1182	const char	*stnam;	/* start of the name, after "[(*" */
1183	const char	*cp;	/* end of the name, e.g. before ']' */
1184	const char	*res;	/* the string to be substituted */
1185	char		*nbuf;	/* new buffer to copy buf->buf to */
1186	size_t		 maxl;  /* expected length of the escape name */
1187	size_t		 naml;	/* actual length of the escape name */
1188	size_t		 asz;	/* length of the replacement */
1189	size_t		 rsz;	/* length of the rest of the string */
1190	int		 inaml;	/* length returned from mandoc_escape() */
1191	int		 expand_count;	/* to avoid infinite loops */
1192	int		 npos;	/* position in numeric expression */
1193	int		 arg_complete; /* argument not interrupted by eol */
1194	int		 quote_args; /* true for \\$@, false for \\$* */
1195	int		 done;	/* no more input available */
1196	int		 deftype; /* type of definition to paste */
1197	int		 rcsid;	/* kind of RCS id seen */
1198	enum mandocerr	 err;	/* for escape sequence problems */
1199	char		 sign;	/* increment number register */
1200	char		 term;	/* character terminating the escape */
1201
1202	/* Search forward for comments. */
1203
1204	done = 0;
1205	start = buf->buf + pos;
1206	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1207		if (stesc[0] != newesc || stesc[1] == '\0')
1208			continue;
1209		stesc++;
1210		if (*stesc != '"' && *stesc != '#')
1211			continue;
1212
1213		/* Comment found, look for RCS id. */
1214
1215		rcsid = 0;
1216		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1217			rcsid = 1 << MANDOC_OS_OPENBSD;
1218			cp += 8;
1219		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1220			rcsid = 1 << MANDOC_OS_NETBSD;
1221			cp += 7;
1222		}
1223		if (cp != NULL &&
1224		    isalnum((unsigned char)*cp) == 0 &&
1225		    strchr(cp, '$') != NULL) {
1226			if (r->man->meta.rcsids & rcsid)
1227				mandoc_msg(MANDOCERR_RCS_REP, ln,
1228				    (int)(stesc - buf->buf) + 1,
1229				    "%s", stesc + 1);
1230			r->man->meta.rcsids |= rcsid;
1231		}
1232
1233		/* Handle trailing whitespace. */
1234
1235		ep = strchr(stesc--, '\0') - 1;
1236		if (*ep == '\n') {
1237			done = 1;
1238			ep--;
1239		}
1240		if (*ep == ' ' || *ep == '\t')
1241			mandoc_msg(MANDOCERR_SPACE_EOL,
1242			    ln, (int)(ep - buf->buf), NULL);
1243
1244		/*
1245		 * Save comments preceding the title macro
1246		 * in the syntax tree.
1247		 */
1248
1249		if (newesc != ASCII_ESC && r->format == 0) {
1250			while (*ep == ' ' || *ep == '\t')
1251				ep--;
1252			ep[1] = '\0';
1253			n = roff_node_alloc(r->man,
1254			    ln, stesc + 1 - buf->buf,
1255			    ROFFT_COMMENT, TOKEN_NONE);
1256			n->string = mandoc_strdup(stesc + 2);
1257			roff_node_append(r->man, n);
1258			n->flags |= NODE_VALID | NODE_ENDED;
1259			r->man->next = ROFF_NEXT_SIBLING;
1260		}
1261
1262		/* Line continuation with comment. */
1263
1264		if (stesc[1] == '#') {
1265			*stesc = '\0';
1266			return ROFF_IGN | ROFF_APPEND;
1267		}
1268
1269		/* Discard normal comments. */
1270
1271		while (stesc > start && stesc[-1] == ' ' &&
1272		    (stesc == start + 1 || stesc[-2] != '\\'))
1273			stesc--;
1274		*stesc = '\0';
1275		break;
1276	}
1277	if (stesc == start)
1278		return ROFF_CONT;
1279	stesc--;
1280
1281	/* Notice the end of the input. */
1282
1283	if (*stesc == '\n') {
1284		*stesc-- = '\0';
1285		done = 1;
1286	}
1287
1288	expand_count = 0;
1289	while (stesc >= start) {
1290		if (*stesc != newesc) {
1291
1292			/*
1293			 * If we have a non-standard escape character,
1294			 * escape literal backslashes because all
1295			 * processing in subsequent functions uses
1296			 * the standard escaping rules.
1297			 */
1298
1299			if (newesc != ASCII_ESC && *stesc == '\\') {
1300				*stesc = '\0';
1301				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1302				    buf->buf, stesc + 1) + 1;
1303				start = nbuf + pos;
1304				stesc = nbuf + (stesc - buf->buf);
1305				free(buf->buf);
1306				buf->buf = nbuf;
1307			}
1308
1309			/* Search backwards for the next escape. */
1310
1311			stesc--;
1312			continue;
1313		}
1314
1315		/* If it is escaped, skip it. */
1316
1317		for (cp = stesc - 1; cp >= start; cp--)
1318			if (*cp != r->escape)
1319				break;
1320
1321		if ((stesc - cp) % 2 == 0) {
1322			while (stesc > cp)
1323				*stesc-- = '\\';
1324			continue;
1325		} else if (stesc[1] != '\0') {
1326			*stesc = '\\';
1327		} else {
1328			*stesc-- = '\0';
1329			if (done)
1330				continue;
1331			else
1332				return ROFF_IGN | ROFF_APPEND;
1333		}
1334
1335		/* Decide whether to expand or to check only. */
1336
1337		term = '\0';
1338		cp = stesc + 1;
1339		if (*cp == 'E')
1340			cp++;
1341		esct = cp;
1342		switch (*esct) {
1343		case '*':
1344		case '$':
1345			res = NULL;
1346			break;
1347		case 'B':
1348		case 'w':
1349			term = cp[1];
1350			/* FALLTHROUGH */
1351		case 'n':
1352			sign = cp[1];
1353			if (sign == '+' || sign == '-')
1354				cp++;
1355			res = ubuf;
1356			break;
1357		default:
1358			err = MANDOCERR_OK;
1359			switch(mandoc_escape(&cp, &stnam, &inaml)) {
1360			case ESCAPE_SPECIAL:
1361				if (mchars_spec2cp(stnam, inaml) >= 0)
1362					break;
1363				/* FALLTHROUGH */
1364			case ESCAPE_ERROR:
1365				err = MANDOCERR_ESC_BAD;
1366				break;
1367			case ESCAPE_UNDEF:
1368				err = MANDOCERR_ESC_UNDEF;
1369				break;
1370			case ESCAPE_UNSUPP:
1371				err = MANDOCERR_ESC_UNSUPP;
1372				break;
1373			default:
1374				break;
1375			}
1376			if (err != MANDOCERR_OK)
1377				mandoc_msg(err, ln, (int)(stesc - buf->buf),
1378				    "%.*s", (int)(cp - stesc), stesc);
1379			stesc--;
1380			continue;
1381		}
1382
1383		if (EXPAND_LIMIT < ++expand_count) {
1384			mandoc_msg(MANDOCERR_ROFFLOOP,
1385			    ln, (int)(stesc - buf->buf), NULL);
1386			return ROFF_IGN;
1387		}
1388
1389		/*
1390		 * The third character decides the length
1391		 * of the name of the string or register.
1392		 * Save a pointer to the name.
1393		 */
1394
1395		if (term == '\0') {
1396			switch (*++cp) {
1397			case '\0':
1398				maxl = 0;
1399				break;
1400			case '(':
1401				cp++;
1402				maxl = 2;
1403				break;
1404			case '[':
1405				cp++;
1406				term = ']';
1407				maxl = 0;
1408				break;
1409			default:
1410				maxl = 1;
1411				break;
1412			}
1413		} else {
1414			cp += 2;
1415			maxl = 0;
1416		}
1417		stnam = cp;
1418
1419		/* Advance to the end of the name. */
1420
1421		naml = 0;
1422		arg_complete = 1;
1423		while (maxl == 0 || naml < maxl) {
1424			if (*cp == '\0') {
1425				mandoc_msg(MANDOCERR_ESC_BAD, ln,
1426				    (int)(stesc - buf->buf), "%s", stesc);
1427				arg_complete = 0;
1428				break;
1429			}
1430			if (maxl == 0 && *cp == term) {
1431				cp++;
1432				break;
1433			}
1434			if (*cp++ != '\\' || *esct != 'w') {
1435				naml++;
1436				continue;
1437			}
1438			switch (mandoc_escape(&cp, NULL, NULL)) {
1439			case ESCAPE_SPECIAL:
1440			case ESCAPE_UNICODE:
1441			case ESCAPE_NUMBERED:
1442			case ESCAPE_UNDEF:
1443			case ESCAPE_OVERSTRIKE:
1444				naml++;
1445				break;
1446			default:
1447				break;
1448			}
1449		}
1450
1451		/*
1452		 * Retrieve the replacement string; if it is
1453		 * undefined, resume searching for escapes.
1454		 */
1455
1456		switch (*esct) {
1457		case '*':
1458			if (arg_complete) {
1459				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1460				res = roff_getstrn(r, stnam, naml, &deftype);
1461
1462				/*
1463				 * If not overriden, let \*(.T
1464				 * through to the formatters.
1465				 */
1466
1467				if (res == NULL && naml == 2 &&
1468				    stnam[0] == '.' && stnam[1] == 'T') {
1469					roff_setstrn(&r->strtab,
1470					    ".T", 2, NULL, 0, 0);
1471					stesc--;
1472					continue;
1473				}
1474			}
1475			break;
1476		case '$':
1477			if (r->mstackpos < 0) {
1478				mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1479				    (int)(stesc - buf->buf), "%.3s", stesc);
1480				break;
1481			}
1482			ctx = r->mstack + r->mstackpos;
1483			npos = esct[1] - '1';
1484			if (npos >= 0 && npos <= 8) {
1485				res = npos < ctx->argc ?
1486				    ctx->argv[npos] : "";
1487				break;
1488			}
1489			if (esct[1] == '*')
1490				quote_args = 0;
1491			else if (esct[1] == '@')
1492				quote_args = 1;
1493			else {
1494				mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1495				    (int)(stesc - buf->buf), "%.3s", stesc);
1496				break;
1497			}
1498			asz = 0;
1499			for (npos = 0; npos < ctx->argc; npos++) {
1500				if (npos)
1501					asz++;  /* blank */
1502				if (quote_args)
1503					asz += 2;  /* quotes */
1504				asz += strlen(ctx->argv[npos]);
1505			}
1506			if (asz != 3) {
1507				rsz = buf->sz - (stesc - buf->buf) - 3;
1508				if (asz < 3)
1509					memmove(stesc + asz, stesc + 3, rsz);
1510				buf->sz += asz - 3;
1511				nbuf = mandoc_realloc(buf->buf, buf->sz);
1512				start = nbuf + pos;
1513				stesc = nbuf + (stesc - buf->buf);
1514				buf->buf = nbuf;
1515				if (asz > 3)
1516					memmove(stesc + asz, stesc + 3, rsz);
1517			}
1518			for (npos = 0; npos < ctx->argc; npos++) {
1519				if (npos)
1520					*stesc++ = ' ';
1521				if (quote_args)
1522					*stesc++ = '"';
1523				cp = ctx->argv[npos];
1524				while (*cp != '\0')
1525					*stesc++ = *cp++;
1526				if (quote_args)
1527					*stesc++ = '"';
1528			}
1529			continue;
1530		case 'B':
1531			npos = 0;
1532			ubuf[0] = arg_complete &&
1533			    roff_evalnum(r, ln, stnam, &npos,
1534			      NULL, ROFFNUM_SCALE) &&
1535			    stnam + npos + 1 == cp ? '1' : '0';
1536			ubuf[1] = '\0';
1537			break;
1538		case 'n':
1539			if (arg_complete)
1540				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1541				    roff_getregn(r, stnam, naml, sign));
1542			else
1543				ubuf[0] = '\0';
1544			break;
1545		case 'w':
1546			/* use even incomplete args */
1547			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1548			    24 * (int)naml);
1549			break;
1550		}
1551
1552		if (res == NULL) {
1553			if (*esct == '*')
1554				mandoc_msg(MANDOCERR_STR_UNDEF,
1555				    ln, (int)(stesc - buf->buf),
1556				    "%.*s", (int)naml, stnam);
1557			res = "";
1558		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1559			mandoc_msg(MANDOCERR_ROFFLOOP,
1560			    ln, (int)(stesc - buf->buf), NULL);
1561			return ROFF_IGN;
1562		}
1563
1564		/* Replace the escape sequence by the string. */
1565
1566		*stesc = '\0';
1567		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1568		    buf->buf, res, cp) + 1;
1569
1570		/* Prepare for the next replacement. */
1571
1572		start = nbuf + pos;
1573		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1574		free(buf->buf);
1575		buf->buf = nbuf;
1576	}
1577	return ROFF_CONT;
1578}
1579
1580/*
1581 * Parse a quoted or unquoted roff-style request or macro argument.
1582 * Return a pointer to the parsed argument, which is either the original
1583 * pointer or advanced by one byte in case the argument is quoted.
1584 * NUL-terminate the argument in place.
1585 * Collapse pairs of quotes inside quoted arguments.
1586 * Advance the argument pointer to the next argument,
1587 * or to the NUL byte terminating the argument line.
1588 */
1589char *
1590roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1591{
1592	struct buf	 buf;
1593	char		*cp, *start;
1594	int		 newesc, pairs, quoted, white;
1595
1596	/* Quoting can only start with a new word. */
1597	start = *cpp;
1598	quoted = 0;
1599	if ('"' == *start) {
1600		quoted = 1;
1601		start++;
1602	}
1603
1604	newesc = pairs = white = 0;
1605	for (cp = start; '\0' != *cp; cp++) {
1606
1607		/*
1608		 * Move the following text left
1609		 * after quoted quotes and after "\\" and "\t".
1610		 */
1611		if (pairs)
1612			cp[-pairs] = cp[0];
1613
1614		if ('\\' == cp[0]) {
1615			/*
1616			 * In copy mode, translate double to single
1617			 * backslashes and backslash-t to literal tabs.
1618			 */
1619			switch (cp[1]) {
1620			case 'a':
1621			case 't':
1622				cp[-pairs] = '\t';
1623				pairs++;
1624				cp++;
1625				break;
1626			case '\\':
1627				newesc = 1;
1628				cp[-pairs] = ASCII_ESC;
1629				pairs++;
1630				cp++;
1631				break;
1632			case ' ':
1633				/* Skip escaped blanks. */
1634				if (0 == quoted)
1635					cp++;
1636				break;
1637			default:
1638				break;
1639			}
1640		} else if (0 == quoted) {
1641			if (' ' == cp[0]) {
1642				/* Unescaped blanks end unquoted args. */
1643				white = 1;
1644				break;
1645			}
1646		} else if ('"' == cp[0]) {
1647			if ('"' == cp[1]) {
1648				/* Quoted quotes collapse. */
1649				pairs++;
1650				cp++;
1651			} else {
1652				/* Unquoted quotes end quoted args. */
1653				quoted = 2;
1654				break;
1655			}
1656		}
1657	}
1658
1659	/* Quoted argument without a closing quote. */
1660	if (1 == quoted)
1661		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1662
1663	/* NUL-terminate this argument and move to the next one. */
1664	if (pairs)
1665		cp[-pairs] = '\0';
1666	if ('\0' != *cp) {
1667		*cp++ = '\0';
1668		while (' ' == *cp)
1669			cp++;
1670	}
1671	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1672	*cpp = cp;
1673
1674	if ('\0' == *cp && (white || ' ' == cp[-1]))
1675		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1676
1677	start = mandoc_strdup(start);
1678	if (newesc == 0)
1679		return start;
1680
1681	buf.buf = start;
1682	buf.sz = strlen(start) + 1;
1683	buf.next = NULL;
1684	if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1685		free(buf.buf);
1686		buf.buf = mandoc_strdup("");
1687	}
1688	return buf.buf;
1689}
1690
1691
1692/*
1693 * Process text streams.
1694 */
1695static int
1696roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1697{
1698	size_t		 sz;
1699	const char	*start;
1700	char		*p;
1701	int		 isz;
1702	enum mandoc_esc	 esc;
1703
1704	/* Spring the input line trap. */
1705
1706	if (roffit_lines == 1) {
1707		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1708		free(buf->buf);
1709		buf->buf = p;
1710		buf->sz = isz + 1;
1711		*offs = 0;
1712		free(roffit_macro);
1713		roffit_lines = 0;
1714		return ROFF_REPARSE;
1715	} else if (roffit_lines > 1)
1716		--roffit_lines;
1717
1718	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1719		if (roffce_lines < 1) {
1720			r->man->last = roffce_node;
1721			r->man->next = ROFF_NEXT_SIBLING;
1722			roffce_lines = 0;
1723			roffce_node = NULL;
1724		} else
1725			roffce_lines--;
1726	}
1727
1728	/* Convert all breakable hyphens into ASCII_HYPH. */
1729
1730	start = p = buf->buf + pos;
1731
1732	while (*p != '\0') {
1733		sz = strcspn(p, "-\\");
1734		p += sz;
1735
1736		if (*p == '\0')
1737			break;
1738
1739		if (*p == '\\') {
1740			/* Skip over escapes. */
1741			p++;
1742			esc = mandoc_escape((const char **)&p, NULL, NULL);
1743			if (esc == ESCAPE_ERROR)
1744				break;
1745			while (*p == '-')
1746				p++;
1747			continue;
1748		} else if (p == start) {
1749			p++;
1750			continue;
1751		}
1752
1753		if (isalpha((unsigned char)p[-1]) &&
1754		    isalpha((unsigned char)p[1]))
1755			*p = ASCII_HYPH;
1756		p++;
1757	}
1758	return ROFF_CONT;
1759}
1760
1761int
1762roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1763{
1764	enum roff_tok	 t;
1765	int		 e;
1766	int		 pos;	/* parse point */
1767	int		 spos;	/* saved parse point for messages */
1768	int		 ppos;	/* original offset in buf->buf */
1769	int		 ctl;	/* macro line (boolean) */
1770
1771	ppos = pos = *offs;
1772
1773	/* Handle in-line equation delimiters. */
1774
1775	if (r->tbl == NULL &&
1776	    r->last_eqn != NULL && r->last_eqn->delim &&
1777	    (r->eqn == NULL || r->eqn_inline)) {
1778		e = roff_eqndelim(r, buf, pos);
1779		if (e == ROFF_REPARSE)
1780			return e;
1781		assert(e == ROFF_CONT);
1782	}
1783
1784	/* Expand some escape sequences. */
1785
1786	e = roff_expand(r, buf, ln, pos, r->escape);
1787	if ((e & ROFF_MASK) == ROFF_IGN)
1788		return e;
1789	assert(e == ROFF_CONT);
1790
1791	ctl = roff_getcontrol(r, buf->buf, &pos);
1792
1793	/*
1794	 * First, if a scope is open and we're not a macro, pass the
1795	 * text through the macro's filter.
1796	 * Equations process all content themselves.
1797	 * Tables process almost all content themselves, but we want
1798	 * to warn about macros before passing it there.
1799	 */
1800
1801	if (r->last != NULL && ! ctl) {
1802		t = r->last->tok;
1803		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1804		if ((e & ROFF_MASK) == ROFF_IGN)
1805			return e;
1806		e &= ~ROFF_MASK;
1807	} else
1808		e = ROFF_IGN;
1809	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1810		eqn_read(r->eqn, buf->buf + ppos);
1811		return e;
1812	}
1813	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1814		tbl_read(r->tbl, ln, buf->buf, ppos);
1815		roff_addtbl(r->man, ln, r->tbl);
1816		return e;
1817	}
1818	if ( ! ctl)
1819		return roff_parsetext(r, buf, pos, offs) | e;
1820
1821	/* Skip empty request lines. */
1822
1823	if (buf->buf[pos] == '"') {
1824		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1825		return ROFF_IGN;
1826	} else if (buf->buf[pos] == '\0')
1827		return ROFF_IGN;
1828
1829	/*
1830	 * If a scope is open, go to the child handler for that macro,
1831	 * as it may want to preprocess before doing anything with it.
1832	 * Don't do so if an equation is open.
1833	 */
1834
1835	if (r->last) {
1836		t = r->last->tok;
1837		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1838	}
1839
1840	/* No scope is open.  This is a new request or macro. */
1841
1842	spos = pos;
1843	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1844
1845	/* Tables ignore most macros. */
1846
1847	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1848	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1849		mandoc_msg(MANDOCERR_TBLMACRO,
1850		    ln, pos, "%s", buf->buf + spos);
1851		if (t != TOKEN_NONE)
1852			return ROFF_IGN;
1853		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1854			pos++;
1855		while (buf->buf[pos] == ' ')
1856			pos++;
1857		tbl_read(r->tbl, ln, buf->buf, pos);
1858		roff_addtbl(r->man, ln, r->tbl);
1859		return ROFF_IGN;
1860	}
1861
1862	/* For now, let high level macros abort .ce mode. */
1863
1864	if (ctl && roffce_node != NULL &&
1865	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1866	     t == ROFF_TH || t == ROFF_TS)) {
1867		r->man->last = roffce_node;
1868		r->man->next = ROFF_NEXT_SIBLING;
1869		roffce_lines = 0;
1870		roffce_node = NULL;
1871	}
1872
1873	/*
1874	 * This is neither a roff request nor a user-defined macro.
1875	 * Let the standard macro set parsers handle it.
1876	 */
1877
1878	if (t == TOKEN_NONE)
1879		return ROFF_CONT;
1880
1881	/* Execute a roff request or a user defined macro. */
1882
1883	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1884}
1885
1886/*
1887 * Internal interface function to tell the roff parser that execution
1888 * of the current macro ended.  This is required because macro
1889 * definitions usually do not end with a .return request.
1890 */
1891void
1892roff_userret(struct roff *r)
1893{
1894	struct mctx	*ctx;
1895	int		 i;
1896
1897	assert(r->mstackpos >= 0);
1898	ctx = r->mstack + r->mstackpos;
1899	for (i = 0; i < ctx->argc; i++)
1900		free(ctx->argv[i]);
1901	ctx->argc = 0;
1902	r->mstackpos--;
1903}
1904
1905void
1906roff_endparse(struct roff *r)
1907{
1908	if (r->last != NULL)
1909		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1910		    r->last->col, "%s", roff_name[r->last->tok]);
1911
1912	if (r->eqn != NULL) {
1913		mandoc_msg(MANDOCERR_BLK_NOEND,
1914		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1915		eqn_parse(r->eqn);
1916		r->eqn = NULL;
1917	}
1918
1919	if (r->tbl != NULL) {
1920		tbl_end(r->tbl, 1);
1921		r->tbl = NULL;
1922	}
1923}
1924
1925/*
1926 * Parse a roff node's type from the input buffer.  This must be in the
1927 * form of ".foo xxx" in the usual way.
1928 */
1929static enum roff_tok
1930roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1931{
1932	char		*cp;
1933	const char	*mac;
1934	size_t		 maclen;
1935	int		 deftype;
1936	enum roff_tok	 t;
1937
1938	cp = buf + *pos;
1939
1940	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1941		return TOKEN_NONE;
1942
1943	mac = cp;
1944	maclen = roff_getname(r, &cp, ln, ppos);
1945
1946	deftype = ROFFDEF_USER | ROFFDEF_REN;
1947	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1948	switch (deftype) {
1949	case ROFFDEF_USER:
1950		t = ROFF_USERDEF;
1951		break;
1952	case ROFFDEF_REN:
1953		t = ROFF_RENAMED;
1954		break;
1955	default:
1956		t = roffhash_find(r->reqtab, mac, maclen);
1957		break;
1958	}
1959	if (t != TOKEN_NONE)
1960		*pos = cp - buf;
1961	else if (deftype == ROFFDEF_UNDEF) {
1962		/* Using an undefined macro defines it to be empty. */
1963		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1964		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1965	}
1966	return t;
1967}
1968
1969/* --- handling of request blocks ----------------------------------------- */
1970
1971static int
1972roff_cblock(ROFF_ARGS)
1973{
1974
1975	/*
1976	 * A block-close `..' should only be invoked as a child of an
1977	 * ignore macro, otherwise raise a warning and just ignore it.
1978	 */
1979
1980	if (r->last == NULL) {
1981		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1982		return ROFF_IGN;
1983	}
1984
1985	switch (r->last->tok) {
1986	case ROFF_am:
1987		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1988	case ROFF_ami:
1989	case ROFF_de:
1990		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1991	case ROFF_dei:
1992	case ROFF_ig:
1993		break;
1994	default:
1995		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1996		return ROFF_IGN;
1997	}
1998
1999	if (buf->buf[pos] != '\0')
2000		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2001		    ".. %s", buf->buf + pos);
2002
2003	roffnode_pop(r);
2004	roffnode_cleanscope(r);
2005	return ROFF_IGN;
2006
2007}
2008
2009/*
2010 * Pop all nodes ending at the end of the current input line.
2011 * Return the number of loops ended.
2012 */
2013static int
2014roffnode_cleanscope(struct roff *r)
2015{
2016	int inloop;
2017
2018	inloop = 0;
2019	while (r->last != NULL) {
2020		if (--r->last->endspan != 0)
2021			break;
2022		inloop += roffnode_pop(r);
2023	}
2024	return inloop;
2025}
2026
2027/*
2028 * Handle the closing \} of a conditional block.
2029 * Apart from generating warnings, this only pops nodes.
2030 * Return the number of loops ended.
2031 */
2032static int
2033roff_ccond(struct roff *r, int ln, int ppos)
2034{
2035	if (NULL == r->last) {
2036		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2037		return 0;
2038	}
2039
2040	switch (r->last->tok) {
2041	case ROFF_el:
2042	case ROFF_ie:
2043	case ROFF_if:
2044	case ROFF_while:
2045		break;
2046	default:
2047		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2048		return 0;
2049	}
2050
2051	if (r->last->endspan > -1) {
2052		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2053		return 0;
2054	}
2055
2056	return roffnode_pop(r) + roffnode_cleanscope(r);
2057}
2058
2059static int
2060roff_block(ROFF_ARGS)
2061{
2062	const char	*name, *value;
2063	char		*call, *cp, *iname, *rname;
2064	size_t		 csz, namesz, rsz;
2065	int		 deftype;
2066
2067	/* Ignore groff compatibility mode for now. */
2068
2069	if (tok == ROFF_de1)
2070		tok = ROFF_de;
2071	else if (tok == ROFF_dei1)
2072		tok = ROFF_dei;
2073	else if (tok == ROFF_am1)
2074		tok = ROFF_am;
2075	else if (tok == ROFF_ami1)
2076		tok = ROFF_ami;
2077
2078	/* Parse the macro name argument. */
2079
2080	cp = buf->buf + pos;
2081	if (tok == ROFF_ig) {
2082		iname = NULL;
2083		namesz = 0;
2084	} else {
2085		iname = cp;
2086		namesz = roff_getname(r, &cp, ln, ppos);
2087		iname[namesz] = '\0';
2088	}
2089
2090	/* Resolve the macro name argument if it is indirect. */
2091
2092	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2093		deftype = ROFFDEF_USER;
2094		name = roff_getstrn(r, iname, namesz, &deftype);
2095		if (name == NULL) {
2096			mandoc_msg(MANDOCERR_STR_UNDEF,
2097			    ln, (int)(iname - buf->buf),
2098			    "%.*s", (int)namesz, iname);
2099			namesz = 0;
2100		} else
2101			namesz = strlen(name);
2102	} else
2103		name = iname;
2104
2105	if (namesz == 0 && tok != ROFF_ig) {
2106		mandoc_msg(MANDOCERR_REQ_EMPTY,
2107		    ln, ppos, "%s", roff_name[tok]);
2108		return ROFF_IGN;
2109	}
2110
2111	roffnode_push(r, tok, name, ln, ppos);
2112
2113	/*
2114	 * At the beginning of a `de' macro, clear the existing string
2115	 * with the same name, if there is one.  New content will be
2116	 * appended from roff_block_text() in multiline mode.
2117	 */
2118
2119	if (tok == ROFF_de || tok == ROFF_dei) {
2120		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2121		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2122	} else if (tok == ROFF_am || tok == ROFF_ami) {
2123		deftype = ROFFDEF_ANY;
2124		value = roff_getstrn(r, iname, namesz, &deftype);
2125		switch (deftype) {  /* Before appending, ... */
2126		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2127			roff_setstrn(&r->strtab, name, namesz,
2128			    value, strlen(value), 0);
2129			break;
2130		case ROFFDEF_REN: /* call original standard macro. */
2131			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2132			    (int)strlen(value), value);
2133			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2134			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2135			free(call);
2136			break;
2137		case ROFFDEF_STD:  /* rename and call standard macro. */
2138			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2139			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2140			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2141			    (int)rsz, rname);
2142			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2143			free(call);
2144			free(rname);
2145			break;
2146		default:
2147			break;
2148		}
2149	}
2150
2151	if (*cp == '\0')
2152		return ROFF_IGN;
2153
2154	/* Get the custom end marker. */
2155
2156	iname = cp;
2157	namesz = roff_getname(r, &cp, ln, ppos);
2158
2159	/* Resolve the end marker if it is indirect. */
2160
2161	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2162		deftype = ROFFDEF_USER;
2163		name = roff_getstrn(r, iname, namesz, &deftype);
2164		if (name == NULL) {
2165			mandoc_msg(MANDOCERR_STR_UNDEF,
2166			    ln, (int)(iname - buf->buf),
2167			    "%.*s", (int)namesz, iname);
2168			namesz = 0;
2169		} else
2170			namesz = strlen(name);
2171	} else
2172		name = iname;
2173
2174	if (namesz)
2175		r->last->end = mandoc_strndup(name, namesz);
2176
2177	if (*cp != '\0')
2178		mandoc_msg(MANDOCERR_ARG_EXCESS,
2179		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2180
2181	return ROFF_IGN;
2182}
2183
2184static int
2185roff_block_sub(ROFF_ARGS)
2186{
2187	enum roff_tok	t;
2188	int		i, j;
2189
2190	/*
2191	 * First check whether a custom macro exists at this level.  If
2192	 * it does, then check against it.  This is some of groff's
2193	 * stranger behaviours.  If we encountered a custom end-scope
2194	 * tag and that tag also happens to be a "real" macro, then we
2195	 * need to try interpreting it again as a real macro.  If it's
2196	 * not, then return ignore.  Else continue.
2197	 */
2198
2199	if (r->last->end) {
2200		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2201			if (buf->buf[i] != r->last->end[j])
2202				break;
2203
2204		if (r->last->end[j] == '\0' &&
2205		    (buf->buf[i] == '\0' ||
2206		     buf->buf[i] == ' ' ||
2207		     buf->buf[i] == '\t')) {
2208			roffnode_pop(r);
2209			roffnode_cleanscope(r);
2210
2211			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2212				i++;
2213
2214			pos = i;
2215			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2216			    TOKEN_NONE)
2217				return ROFF_RERUN;
2218			return ROFF_IGN;
2219		}
2220	}
2221
2222	/*
2223	 * If we have no custom end-query or lookup failed, then try
2224	 * pulling it out of the hashtable.
2225	 */
2226
2227	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2228
2229	if (t != ROFF_cblock) {
2230		if (tok != ROFF_ig)
2231			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2232		return ROFF_IGN;
2233	}
2234
2235	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2236}
2237
2238static int
2239roff_block_text(ROFF_ARGS)
2240{
2241
2242	if (tok != ROFF_ig)
2243		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2244
2245	return ROFF_IGN;
2246}
2247
2248static int
2249roff_cond_sub(ROFF_ARGS)
2250{
2251	struct roffnode	*bl;
2252	char		*ep;
2253	int		 endloop, irc, rr;
2254	enum roff_tok	 t;
2255
2256	irc = ROFF_IGN;
2257	rr = r->last->rule;
2258	endloop = tok != ROFF_while ? ROFF_IGN :
2259	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2260	if (roffnode_cleanscope(r))
2261		irc |= endloop;
2262
2263	/*
2264	 * If `\}' occurs on a macro line without a preceding macro,
2265	 * drop the line completely.
2266	 */
2267
2268	ep = buf->buf + pos;
2269	if (ep[0] == '\\' && ep[1] == '}')
2270		rr = 0;
2271
2272	/*
2273	 * The closing delimiter `\}' rewinds the conditional scope
2274	 * but is otherwise ignored when interpreting the line.
2275	 */
2276
2277	while ((ep = strchr(ep, '\\')) != NULL) {
2278		switch (ep[1]) {
2279		case '}':
2280			memmove(ep, ep + 2, strlen(ep + 2) + 1);
2281			if (roff_ccond(r, ln, ep - buf->buf))
2282				irc |= endloop;
2283			break;
2284		case '\0':
2285			++ep;
2286			break;
2287		default:
2288			ep += 2;
2289			break;
2290		}
2291	}
2292
2293	/*
2294	 * Fully handle known macros when they are structurally
2295	 * required or when the conditional evaluated to true.
2296	 */
2297
2298	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2299	if (t == ROFF_break) {
2300		if (irc & ROFF_LOOPMASK)
2301			irc = ROFF_IGN | ROFF_LOOPEXIT;
2302		else if (rr) {
2303			for (bl = r->last; bl != NULL; bl = bl->parent) {
2304				bl->rule = 0;
2305				if (bl->tok == ROFF_while)
2306					break;
2307			}
2308		}
2309	} else if (t != TOKEN_NONE &&
2310	    (rr || roffs[t].flags & ROFFMAC_STRUCT))
2311		irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2312	else
2313		irc |= rr ? ROFF_CONT : ROFF_IGN;
2314	return irc;
2315}
2316
2317static int
2318roff_cond_text(ROFF_ARGS)
2319{
2320	char		*ep;
2321	int		 endloop, irc, rr;
2322
2323	irc = ROFF_IGN;
2324	rr = r->last->rule;
2325	endloop = tok != ROFF_while ? ROFF_IGN :
2326	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2327	if (roffnode_cleanscope(r))
2328		irc |= endloop;
2329
2330	/*
2331	 * If `\}' occurs on a text line with neither preceding
2332	 * nor following characters, drop the line completely.
2333	 */
2334
2335	ep = buf->buf + pos;
2336	if (strcmp(ep, "\\}") == 0)
2337		rr = 0;
2338
2339	/*
2340	 * The closing delimiter `\}' rewinds the conditional scope
2341	 * but is otherwise ignored when interpreting the line.
2342	 */
2343
2344	while ((ep = strchr(ep, '\\')) != NULL) {
2345		switch (ep[1]) {
2346		case '}':
2347			memmove(ep, ep + 2, strlen(ep + 2) + 1);
2348			if (roff_ccond(r, ln, ep - buf->buf))
2349				irc |= endloop;
2350			break;
2351		case '\0':
2352			++ep;
2353			break;
2354		default:
2355			ep += 2;
2356			break;
2357		}
2358	}
2359	if (rr)
2360		irc |= ROFF_CONT;
2361	return irc;
2362}
2363
2364/* --- handling of numeric and conditional expressions -------------------- */
2365
2366/*
2367 * Parse a single signed integer number.  Stop at the first non-digit.
2368 * If there is at least one digit, return success and advance the
2369 * parse point, else return failure and let the parse point unchanged.
2370 * Ignore overflows, treat them just like the C language.
2371 */
2372static int
2373roff_getnum(const char *v, int *pos, int *res, int flags)
2374{
2375	int	 myres, scaled, n, p;
2376
2377	if (NULL == res)
2378		res = &myres;
2379
2380	p = *pos;
2381	n = v[p] == '-';
2382	if (n || v[p] == '+')
2383		p++;
2384
2385	if (flags & ROFFNUM_WHITE)
2386		while (isspace((unsigned char)v[p]))
2387			p++;
2388
2389	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2390		*res = 10 * *res + v[p] - '0';
2391	if (p == *pos + n)
2392		return 0;
2393
2394	if (n)
2395		*res = -*res;
2396
2397	/* Each number may be followed by one optional scaling unit. */
2398
2399	switch (v[p]) {
2400	case 'f':
2401		scaled = *res * 65536;
2402		break;
2403	case 'i':
2404		scaled = *res * 240;
2405		break;
2406	case 'c':
2407		scaled = *res * 240 / 2.54;
2408		break;
2409	case 'v':
2410	case 'P':
2411		scaled = *res * 40;
2412		break;
2413	case 'm':
2414	case 'n':
2415		scaled = *res * 24;
2416		break;
2417	case 'p':
2418		scaled = *res * 10 / 3;
2419		break;
2420	case 'u':
2421		scaled = *res;
2422		break;
2423	case 'M':
2424		scaled = *res * 6 / 25;
2425		break;
2426	default:
2427		scaled = *res;
2428		p--;
2429		break;
2430	}
2431	if (flags & ROFFNUM_SCALE)
2432		*res = scaled;
2433
2434	*pos = p + 1;
2435	return 1;
2436}
2437
2438/*
2439 * Evaluate a string comparison condition.
2440 * The first character is the delimiter.
2441 * Succeed if the string up to its second occurrence
2442 * matches the string up to its third occurence.
2443 * Advance the cursor after the third occurrence
2444 * or lacking that, to the end of the line.
2445 */
2446static int
2447roff_evalstrcond(const char *v, int *pos)
2448{
2449	const char	*s1, *s2, *s3;
2450	int		 match;
2451
2452	match = 0;
2453	s1 = v + *pos;		/* initial delimiter */
2454	s2 = s1 + 1;		/* for scanning the first string */
2455	s3 = strchr(s2, *s1);	/* for scanning the second string */
2456
2457	if (NULL == s3)		/* found no middle delimiter */
2458		goto out;
2459
2460	while ('\0' != *++s3) {
2461		if (*s2 != *s3) {  /* mismatch */
2462			s3 = strchr(s3, *s1);
2463			break;
2464		}
2465		if (*s3 == *s1) {  /* found the final delimiter */
2466			match = 1;
2467			break;
2468		}
2469		s2++;
2470	}
2471
2472out:
2473	if (NULL == s3)
2474		s3 = strchr(s2, '\0');
2475	else if (*s3 != '\0')
2476		s3++;
2477	*pos = s3 - v;
2478	return match;
2479}
2480
2481/*
2482 * Evaluate an optionally negated single character, numerical,
2483 * or string condition.
2484 */
2485static int
2486roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2487{
2488	const char	*start, *end;
2489	char		*cp, *name;
2490	size_t		 sz;
2491	int		 deftype, len, number, savepos, istrue, wanttrue;
2492
2493	if ('!' == v[*pos]) {
2494		wanttrue = 0;
2495		(*pos)++;
2496	} else
2497		wanttrue = 1;
2498
2499	switch (v[*pos]) {
2500	case '\0':
2501		return 0;
2502	case 'n':
2503	case 'o':
2504		(*pos)++;
2505		return wanttrue;
2506	case 'e':
2507	case 't':
2508	case 'v':
2509		(*pos)++;
2510		return !wanttrue;
2511	case 'c':
2512		do {
2513			(*pos)++;
2514		} while (v[*pos] == ' ');
2515
2516		/*
2517		 * Quirk for groff compatibility:
2518		 * The horizontal tab is neither available nor unavailable.
2519		 */
2520
2521		if (v[*pos] == '\t') {
2522			(*pos)++;
2523			return 0;
2524		}
2525
2526		/* Printable ASCII characters are available. */
2527
2528		if (v[*pos] != '\\') {
2529			(*pos)++;
2530			return wanttrue;
2531		}
2532
2533		end = v + ++*pos;
2534		switch (mandoc_escape(&end, &start, &len)) {
2535		case ESCAPE_SPECIAL:
2536			istrue = mchars_spec2cp(start, len) != -1;
2537			break;
2538		case ESCAPE_UNICODE:
2539			istrue = 1;
2540			break;
2541		case ESCAPE_NUMBERED:
2542			istrue = mchars_num2char(start, len) != -1;
2543			break;
2544		default:
2545			istrue = !wanttrue;
2546			break;
2547		}
2548		*pos = end - v;
2549		return istrue == wanttrue;
2550	case 'd':
2551	case 'r':
2552		cp = v + *pos + 1;
2553		while (*cp == ' ')
2554			cp++;
2555		name = cp;
2556		sz = roff_getname(r, &cp, ln, cp - v);
2557		if (sz == 0)
2558			istrue = 0;
2559		else if (v[*pos] == 'r')
2560			istrue = roff_hasregn(r, name, sz);
2561		else {
2562			deftype = ROFFDEF_ANY;
2563		        roff_getstrn(r, name, sz, &deftype);
2564			istrue = !!deftype;
2565		}
2566		*pos = (name + sz) - v;
2567		return istrue == wanttrue;
2568	default:
2569		break;
2570	}
2571
2572	savepos = *pos;
2573	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2574		return (number > 0) == wanttrue;
2575	else if (*pos == savepos)
2576		return roff_evalstrcond(v, pos) == wanttrue;
2577	else
2578		return 0;
2579}
2580
2581static int
2582roff_line_ignore(ROFF_ARGS)
2583{
2584
2585	return ROFF_IGN;
2586}
2587
2588static int
2589roff_insec(ROFF_ARGS)
2590{
2591
2592	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2593	return ROFF_IGN;
2594}
2595
2596static int
2597roff_unsupp(ROFF_ARGS)
2598{
2599
2600	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2601	return ROFF_IGN;
2602}
2603
2604static int
2605roff_cond(ROFF_ARGS)
2606{
2607	int	 irc;
2608
2609	roffnode_push(r, tok, NULL, ln, ppos);
2610
2611	/*
2612	 * An `.el' has no conditional body: it will consume the value
2613	 * of the current rstack entry set in prior `ie' calls or
2614	 * defaults to DENY.
2615	 *
2616	 * If we're not an `el', however, then evaluate the conditional.
2617	 */
2618
2619	r->last->rule = tok == ROFF_el ?
2620	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2621	    roff_evalcond(r, ln, buf->buf, &pos);
2622
2623	/*
2624	 * An if-else will put the NEGATION of the current evaluated
2625	 * conditional into the stack of rules.
2626	 */
2627
2628	if (tok == ROFF_ie) {
2629		if (r->rstackpos + 1 == r->rstacksz) {
2630			r->rstacksz += 16;
2631			r->rstack = mandoc_reallocarray(r->rstack,
2632			    r->rstacksz, sizeof(int));
2633		}
2634		r->rstack[++r->rstackpos] = !r->last->rule;
2635	}
2636
2637	/* If the parent has false as its rule, then so do we. */
2638
2639	if (r->last->parent && !r->last->parent->rule)
2640		r->last->rule = 0;
2641
2642	/*
2643	 * Determine scope.
2644	 * If there is nothing on the line after the conditional,
2645	 * not even whitespace, use next-line scope.
2646	 * Except that .while does not support next-line scope.
2647	 */
2648
2649	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2650		r->last->endspan = 2;
2651		goto out;
2652	}
2653
2654	while (buf->buf[pos] == ' ')
2655		pos++;
2656
2657	/* An opening brace requests multiline scope. */
2658
2659	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2660		r->last->endspan = -1;
2661		pos += 2;
2662		while (buf->buf[pos] == ' ')
2663			pos++;
2664		goto out;
2665	}
2666
2667	/*
2668	 * Anything else following the conditional causes
2669	 * single-line scope.  Warn if the scope contains
2670	 * nothing but trailing whitespace.
2671	 */
2672
2673	if (buf->buf[pos] == '\0')
2674		mandoc_msg(MANDOCERR_COND_EMPTY,
2675		    ln, ppos, "%s", roff_name[tok]);
2676
2677	r->last->endspan = 1;
2678
2679out:
2680	*offs = pos;
2681	irc = ROFF_RERUN;
2682	if (tok == ROFF_while)
2683		irc |= ROFF_WHILE;
2684	return irc;
2685}
2686
2687static int
2688roff_ds(ROFF_ARGS)
2689{
2690	char		*string;
2691	const char	*name;
2692	size_t		 namesz;
2693
2694	/* Ignore groff compatibility mode for now. */
2695
2696	if (tok == ROFF_ds1)
2697		tok = ROFF_ds;
2698	else if (tok == ROFF_as1)
2699		tok = ROFF_as;
2700
2701	/*
2702	 * The first word is the name of the string.
2703	 * If it is empty or terminated by an escape sequence,
2704	 * abort the `ds' request without defining anything.
2705	 */
2706
2707	name = string = buf->buf + pos;
2708	if (*name == '\0')
2709		return ROFF_IGN;
2710
2711	namesz = roff_getname(r, &string, ln, pos);
2712	switch (name[namesz]) {
2713	case '\\':
2714		return ROFF_IGN;
2715	case '\t':
2716		string = buf->buf + pos + namesz;
2717		break;
2718	default:
2719		break;
2720	}
2721
2722	/* Read past the initial double-quote, if any. */
2723	if (*string == '"')
2724		string++;
2725
2726	/* The rest is the value. */
2727	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2728	    ROFF_as == tok);
2729	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2730	return ROFF_IGN;
2731}
2732
2733/*
2734 * Parse a single operator, one or two characters long.
2735 * If the operator is recognized, return success and advance the
2736 * parse point, else return failure and let the parse point unchanged.
2737 */
2738static int
2739roff_getop(const char *v, int *pos, char *res)
2740{
2741
2742	*res = v[*pos];
2743
2744	switch (*res) {
2745	case '+':
2746	case '-':
2747	case '*':
2748	case '/':
2749	case '%':
2750	case '&':
2751	case ':':
2752		break;
2753	case '<':
2754		switch (v[*pos + 1]) {
2755		case '=':
2756			*res = 'l';
2757			(*pos)++;
2758			break;
2759		case '>':
2760			*res = '!';
2761			(*pos)++;
2762			break;
2763		case '?':
2764			*res = 'i';
2765			(*pos)++;
2766			break;
2767		default:
2768			break;
2769		}
2770		break;
2771	case '>':
2772		switch (v[*pos + 1]) {
2773		case '=':
2774			*res = 'g';
2775			(*pos)++;
2776			break;
2777		case '?':
2778			*res = 'a';
2779			(*pos)++;
2780			break;
2781		default:
2782			break;
2783		}
2784		break;
2785	case '=':
2786		if ('=' == v[*pos + 1])
2787			(*pos)++;
2788		break;
2789	default:
2790		return 0;
2791	}
2792	(*pos)++;
2793
2794	return *res;
2795}
2796
2797/*
2798 * Evaluate either a parenthesized numeric expression
2799 * or a single signed integer number.
2800 */
2801static int
2802roff_evalpar(struct roff *r, int ln,
2803	const char *v, int *pos, int *res, int flags)
2804{
2805
2806	if ('(' != v[*pos])
2807		return roff_getnum(v, pos, res, flags);
2808
2809	(*pos)++;
2810	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2811		return 0;
2812
2813	/*
2814	 * Omission of the closing parenthesis
2815	 * is an error in validation mode,
2816	 * but ignored in evaluation mode.
2817	 */
2818
2819	if (')' == v[*pos])
2820		(*pos)++;
2821	else if (NULL == res)
2822		return 0;
2823
2824	return 1;
2825}
2826
2827/*
2828 * Evaluate a complete numeric expression.
2829 * Proceed left to right, there is no concept of precedence.
2830 */
2831static int
2832roff_evalnum(struct roff *r, int ln, const char *v,
2833	int *pos, int *res, int flags)
2834{
2835	int		 mypos, operand2;
2836	char		 operator;
2837
2838	if (NULL == pos) {
2839		mypos = 0;
2840		pos = &mypos;
2841	}
2842
2843	if (flags & ROFFNUM_WHITE)
2844		while (isspace((unsigned char)v[*pos]))
2845			(*pos)++;
2846
2847	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2848		return 0;
2849
2850	while (1) {
2851		if (flags & ROFFNUM_WHITE)
2852			while (isspace((unsigned char)v[*pos]))
2853				(*pos)++;
2854
2855		if ( ! roff_getop(v, pos, &operator))
2856			break;
2857
2858		if (flags & ROFFNUM_WHITE)
2859			while (isspace((unsigned char)v[*pos]))
2860				(*pos)++;
2861
2862		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2863			return 0;
2864
2865		if (flags & ROFFNUM_WHITE)
2866			while (isspace((unsigned char)v[*pos]))
2867				(*pos)++;
2868
2869		if (NULL == res)
2870			continue;
2871
2872		switch (operator) {
2873		case '+':
2874			*res += operand2;
2875			break;
2876		case '-':
2877			*res -= operand2;
2878			break;
2879		case '*':
2880			*res *= operand2;
2881			break;
2882		case '/':
2883			if (operand2 == 0) {
2884				mandoc_msg(MANDOCERR_DIVZERO,
2885					ln, *pos, "%s", v);
2886				*res = 0;
2887				break;
2888			}
2889			*res /= operand2;
2890			break;
2891		case '%':
2892			if (operand2 == 0) {
2893				mandoc_msg(MANDOCERR_DIVZERO,
2894					ln, *pos, "%s", v);
2895				*res = 0;
2896				break;
2897			}
2898			*res %= operand2;
2899			break;
2900		case '<':
2901			*res = *res < operand2;
2902			break;
2903		case '>':
2904			*res = *res > operand2;
2905			break;
2906		case 'l':
2907			*res = *res <= operand2;
2908			break;
2909		case 'g':
2910			*res = *res >= operand2;
2911			break;
2912		case '=':
2913			*res = *res == operand2;
2914			break;
2915		case '!':
2916			*res = *res != operand2;
2917			break;
2918		case '&':
2919			*res = *res && operand2;
2920			break;
2921		case ':':
2922			*res = *res || operand2;
2923			break;
2924		case 'i':
2925			if (operand2 < *res)
2926				*res = operand2;
2927			break;
2928		case 'a':
2929			if (operand2 > *res)
2930				*res = operand2;
2931			break;
2932		default:
2933			abort();
2934		}
2935	}
2936	return 1;
2937}
2938
2939/* --- register management ------------------------------------------------ */
2940
2941void
2942roff_setreg(struct roff *r, const char *name, int val, char sign)
2943{
2944	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2945}
2946
2947static void
2948roff_setregn(struct roff *r, const char *name, size_t len,
2949    int val, char sign, int step)
2950{
2951	struct roffreg	*reg;
2952
2953	/* Search for an existing register with the same name. */
2954	reg = r->regtab;
2955
2956	while (reg != NULL && (reg->key.sz != len ||
2957	    strncmp(reg->key.p, name, len) != 0))
2958		reg = reg->next;
2959
2960	if (NULL == reg) {
2961		/* Create a new register. */
2962		reg = mandoc_malloc(sizeof(struct roffreg));
2963		reg->key.p = mandoc_strndup(name, len);
2964		reg->key.sz = len;
2965		reg->val = 0;
2966		reg->step = 0;
2967		reg->next = r->regtab;
2968		r->regtab = reg;
2969	}
2970
2971	if ('+' == sign)
2972		reg->val += val;
2973	else if ('-' == sign)
2974		reg->val -= val;
2975	else
2976		reg->val = val;
2977	if (step != INT_MIN)
2978		reg->step = step;
2979}
2980
2981/*
2982 * Handle some predefined read-only number registers.
2983 * For now, return -1 if the requested register is not predefined;
2984 * in case a predefined read-only register having the value -1
2985 * were to turn up, another special value would have to be chosen.
2986 */
2987static int
2988roff_getregro(const struct roff *r, const char *name)
2989{
2990
2991	switch (*name) {
2992	case '$':  /* Number of arguments of the last macro evaluated. */
2993		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2994	case 'A':  /* ASCII approximation mode is always off. */
2995		return 0;
2996	case 'g':  /* Groff compatibility mode is always on. */
2997		return 1;
2998	case 'H':  /* Fixed horizontal resolution. */
2999		return 24;
3000	case 'j':  /* Always adjust left margin only. */
3001		return 0;
3002	case 'T':  /* Some output device is always defined. */
3003		return 1;
3004	case 'V':  /* Fixed vertical resolution. */
3005		return 40;
3006	default:
3007		return -1;
3008	}
3009}
3010
3011int
3012roff_getreg(struct roff *r, const char *name)
3013{
3014	return roff_getregn(r, name, strlen(name), '\0');
3015}
3016
3017static int
3018roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3019{
3020	struct roffreg	*reg;
3021	int		 val;
3022
3023	if ('.' == name[0] && 2 == len) {
3024		val = roff_getregro(r, name + 1);
3025		if (-1 != val)
3026			return val;
3027	}
3028
3029	for (reg = r->regtab; reg; reg = reg->next) {
3030		if (len == reg->key.sz &&
3031		    0 == strncmp(name, reg->key.p, len)) {
3032			switch (sign) {
3033			case '+':
3034				reg->val += reg->step;
3035				break;
3036			case '-':
3037				reg->val -= reg->step;
3038				break;
3039			default:
3040				break;
3041			}
3042			return reg->val;
3043		}
3044	}
3045
3046	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3047	return 0;
3048}
3049
3050static int
3051roff_hasregn(const struct roff *r, const char *name, size_t len)
3052{
3053	struct roffreg	*reg;
3054	int		 val;
3055
3056	if ('.' == name[0] && 2 == len) {
3057		val = roff_getregro(r, name + 1);
3058		if (-1 != val)
3059			return 1;
3060	}
3061
3062	for (reg = r->regtab; reg; reg = reg->next)
3063		if (len == reg->key.sz &&
3064		    0 == strncmp(name, reg->key.p, len))
3065			return 1;
3066
3067	return 0;
3068}
3069
3070static void
3071roff_freereg(struct roffreg *reg)
3072{
3073	struct roffreg	*old_reg;
3074
3075	while (NULL != reg) {
3076		free(reg->key.p);
3077		old_reg = reg;
3078		reg = reg->next;
3079		free(old_reg);
3080	}
3081}
3082
3083static int
3084roff_nr(ROFF_ARGS)
3085{
3086	char		*key, *val, *step;
3087	size_t		 keysz;
3088	int		 iv, is, len;
3089	char		 sign;
3090
3091	key = val = buf->buf + pos;
3092	if (*key == '\0')
3093		return ROFF_IGN;
3094
3095	keysz = roff_getname(r, &val, ln, pos);
3096	if (key[keysz] == '\\' || key[keysz] == '\t')
3097		return ROFF_IGN;
3098
3099	sign = *val;
3100	if (sign == '+' || sign == '-')
3101		val++;
3102
3103	len = 0;
3104	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3105		return ROFF_IGN;
3106
3107	step = val + len;
3108	while (isspace((unsigned char)*step))
3109		step++;
3110	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3111		is = INT_MIN;
3112
3113	roff_setregn(r, key, keysz, iv, sign, is);
3114	return ROFF_IGN;
3115}
3116
3117static int
3118roff_rr(ROFF_ARGS)
3119{
3120	struct roffreg	*reg, **prev;
3121	char		*name, *cp;
3122	size_t		 namesz;
3123
3124	name = cp = buf->buf + pos;
3125	if (*name == '\0')
3126		return ROFF_IGN;
3127	namesz = roff_getname(r, &cp, ln, pos);
3128	name[namesz] = '\0';
3129
3130	prev = &r->regtab;
3131	while (1) {
3132		reg = *prev;
3133		if (reg == NULL || !strcmp(name, reg->key.p))
3134			break;
3135		prev = &reg->next;
3136	}
3137	if (reg != NULL) {
3138		*prev = reg->next;
3139		free(reg->key.p);
3140		free(reg);
3141	}
3142	return ROFF_IGN;
3143}
3144
3145/* --- handler functions for roff requests -------------------------------- */
3146
3147static int
3148roff_rm(ROFF_ARGS)
3149{
3150	const char	 *name;
3151	char		 *cp;
3152	size_t		  namesz;
3153
3154	cp = buf->buf + pos;
3155	while (*cp != '\0') {
3156		name = cp;
3157		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3158		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3159		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3160		if (name[namesz] == '\\' || name[namesz] == '\t')
3161			break;
3162	}
3163	return ROFF_IGN;
3164}
3165
3166static int
3167roff_it(ROFF_ARGS)
3168{
3169	int		 iv;
3170
3171	/* Parse the number of lines. */
3172
3173	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3174		mandoc_msg(MANDOCERR_IT_NONUM,
3175		    ln, ppos, "%s", buf->buf + 1);
3176		return ROFF_IGN;
3177	}
3178
3179	while (isspace((unsigned char)buf->buf[pos]))
3180		pos++;
3181
3182	/*
3183	 * Arm the input line trap.
3184	 * Special-casing "an-trap" is an ugly workaround to cope
3185	 * with DocBook stupidly fiddling with man(7) internals.
3186	 */
3187
3188	roffit_lines = iv;
3189	roffit_macro = mandoc_strdup(iv != 1 ||
3190	    strcmp(buf->buf + pos, "an-trap") ?
3191	    buf->buf + pos : "br");
3192	return ROFF_IGN;
3193}
3194
3195static int
3196roff_Dd(ROFF_ARGS)
3197{
3198	int		 mask;
3199	enum roff_tok	 t, te;
3200
3201	switch (tok) {
3202	case ROFF_Dd:
3203		tok = MDOC_Dd;
3204		te = MDOC_MAX;
3205		if (r->format == 0)
3206			r->format = MPARSE_MDOC;
3207		mask = MPARSE_MDOC | MPARSE_QUICK;
3208		break;
3209	case ROFF_TH:
3210		tok = MAN_TH;
3211		te = MAN_MAX;
3212		if (r->format == 0)
3213			r->format = MPARSE_MAN;
3214		mask = MPARSE_QUICK;
3215		break;
3216	default:
3217		abort();
3218	}
3219	if ((r->options & mask) == 0)
3220		for (t = tok; t < te; t++)
3221			roff_setstr(r, roff_name[t], NULL, 0);
3222	return ROFF_CONT;
3223}
3224
3225static int
3226roff_TE(ROFF_ARGS)
3227{
3228	r->man->flags &= ~ROFF_NONOFILL;
3229	if (r->tbl == NULL) {
3230		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3231		return ROFF_IGN;
3232	}
3233	if (tbl_end(r->tbl, 0) == 0) {
3234		r->tbl = NULL;
3235		free(buf->buf);
3236		buf->buf = mandoc_strdup(".sp");
3237		buf->sz = 4;
3238		*offs = 0;
3239		return ROFF_REPARSE;
3240	}
3241	r->tbl = NULL;
3242	return ROFF_IGN;
3243}
3244
3245static int
3246roff_T_(ROFF_ARGS)
3247{
3248
3249	if (NULL == r->tbl)
3250		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3251	else
3252		tbl_restart(ln, ppos, r->tbl);
3253
3254	return ROFF_IGN;
3255}
3256
3257/*
3258 * Handle in-line equation delimiters.
3259 */
3260static int
3261roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3262{
3263	char		*cp1, *cp2;
3264	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3265
3266	/*
3267	 * Outside equations, look for an opening delimiter.
3268	 * If we are inside an equation, we already know it is
3269	 * in-line, or this function wouldn't have been called;
3270	 * so look for a closing delimiter.
3271	 */
3272
3273	cp1 = buf->buf + pos;
3274	cp2 = strchr(cp1, r->eqn == NULL ?
3275	    r->last_eqn->odelim : r->last_eqn->cdelim);
3276	if (cp2 == NULL)
3277		return ROFF_CONT;
3278
3279	*cp2++ = '\0';
3280	bef_pr = bef_nl = aft_nl = aft_pr = "";
3281
3282	/* Handle preceding text, protecting whitespace. */
3283
3284	if (*buf->buf != '\0') {
3285		if (r->eqn == NULL)
3286			bef_pr = "\\&";
3287		bef_nl = "\n";
3288	}
3289
3290	/*
3291	 * Prepare replacing the delimiter with an equation macro
3292	 * and drop leading white space from the equation.
3293	 */
3294
3295	if (r->eqn == NULL) {
3296		while (*cp2 == ' ')
3297			cp2++;
3298		mac = ".EQ";
3299	} else
3300		mac = ".EN";
3301
3302	/* Handle following text, protecting whitespace. */
3303
3304	if (*cp2 != '\0') {
3305		aft_nl = "\n";
3306		if (r->eqn != NULL)
3307			aft_pr = "\\&";
3308	}
3309
3310	/* Do the actual replacement. */
3311
3312	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3313	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3314	free(buf->buf);
3315	buf->buf = cp1;
3316
3317	/* Toggle the in-line state of the eqn subsystem. */
3318
3319	r->eqn_inline = r->eqn == NULL;
3320	return ROFF_REPARSE;
3321}
3322
3323static int
3324roff_EQ(ROFF_ARGS)
3325{
3326	struct roff_node	*n;
3327
3328	if (r->man->meta.macroset == MACROSET_MAN)
3329		man_breakscope(r->man, ROFF_EQ);
3330	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3331	if (ln > r->man->last->line)
3332		n->flags |= NODE_LINE;
3333	n->eqn = eqn_box_new();
3334	roff_node_append(r->man, n);
3335	r->man->next = ROFF_NEXT_SIBLING;
3336
3337	assert(r->eqn == NULL);
3338	if (r->last_eqn == NULL)
3339		r->last_eqn = eqn_alloc();
3340	else
3341		eqn_reset(r->last_eqn);
3342	r->eqn = r->last_eqn;
3343	r->eqn->node = n;
3344
3345	if (buf->buf[pos] != '\0')
3346		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3347		    ".EQ %s", buf->buf + pos);
3348
3349	return ROFF_IGN;
3350}
3351
3352static int
3353roff_EN(ROFF_ARGS)
3354{
3355	if (r->eqn != NULL) {
3356		eqn_parse(r->eqn);
3357		r->eqn = NULL;
3358	} else
3359		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3360	if (buf->buf[pos] != '\0')
3361		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3362		    "EN %s", buf->buf + pos);
3363	return ROFF_IGN;
3364}
3365
3366static int
3367roff_TS(ROFF_ARGS)
3368{
3369	if (r->tbl != NULL) {
3370		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3371		tbl_end(r->tbl, 0);
3372	}
3373	r->man->flags |= ROFF_NONOFILL;
3374	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3375	if (r->last_tbl == NULL)
3376		r->first_tbl = r->tbl;
3377	r->last_tbl = r->tbl;
3378	return ROFF_IGN;
3379}
3380
3381static int
3382roff_noarg(ROFF_ARGS)
3383{
3384	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3385		man_breakscope(r->man, tok);
3386	if (tok == ROFF_brp)
3387		tok = ROFF_br;
3388	roff_elem_alloc(r->man, ln, ppos, tok);
3389	if (buf->buf[pos] != '\0')
3390		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3391		   "%s %s", roff_name[tok], buf->buf + pos);
3392	if (tok == ROFF_nf)
3393		r->man->flags |= ROFF_NOFILL;
3394	else if (tok == ROFF_fi)
3395		r->man->flags &= ~ROFF_NOFILL;
3396	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3397	r->man->next = ROFF_NEXT_SIBLING;
3398	return ROFF_IGN;
3399}
3400
3401static int
3402roff_onearg(ROFF_ARGS)
3403{
3404	struct roff_node	*n;
3405	char			*cp;
3406	int			 npos;
3407
3408	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3409	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3410	     tok == ROFF_ti))
3411		man_breakscope(r->man, tok);
3412
3413	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3414		r->man->last = roffce_node;
3415		r->man->next = ROFF_NEXT_SIBLING;
3416	}
3417
3418	roff_elem_alloc(r->man, ln, ppos, tok);
3419	n = r->man->last;
3420
3421	cp = buf->buf + pos;
3422	if (*cp != '\0') {
3423		while (*cp != '\0' && *cp != ' ')
3424			cp++;
3425		while (*cp == ' ')
3426			*cp++ = '\0';
3427		if (*cp != '\0')
3428			mandoc_msg(MANDOCERR_ARG_EXCESS,
3429			    ln, (int)(cp - buf->buf),
3430			    "%s ... %s", roff_name[tok], cp);
3431		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3432	}
3433
3434	if (tok == ROFF_ce || tok == ROFF_rj) {
3435		if (r->man->last->type == ROFFT_ELEM) {
3436			roff_word_alloc(r->man, ln, pos, "1");
3437			r->man->last->flags |= NODE_NOSRC;
3438		}
3439		npos = 0;
3440		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3441		    &roffce_lines, 0) == 0) {
3442			mandoc_msg(MANDOCERR_CE_NONUM,
3443			    ln, pos, "ce %s", buf->buf + pos);
3444			roffce_lines = 1;
3445		}
3446		if (roffce_lines < 1) {
3447			r->man->last = r->man->last->parent;
3448			roffce_node = NULL;
3449			roffce_lines = 0;
3450		} else
3451			roffce_node = r->man->last->parent;
3452	} else {
3453		n->flags |= NODE_VALID | NODE_ENDED;
3454		r->man->last = n;
3455	}
3456	n->flags |= NODE_LINE;
3457	r->man->next = ROFF_NEXT_SIBLING;
3458	return ROFF_IGN;
3459}
3460
3461static int
3462roff_manyarg(ROFF_ARGS)
3463{
3464	struct roff_node	*n;
3465	char			*sp, *ep;
3466
3467	roff_elem_alloc(r->man, ln, ppos, tok);
3468	n = r->man->last;
3469
3470	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3471		while (*ep != '\0' && *ep != ' ')
3472			ep++;
3473		while (*ep == ' ')
3474			*ep++ = '\0';
3475		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3476	}
3477
3478	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3479	r->man->last = n;
3480	r->man->next = ROFF_NEXT_SIBLING;
3481	return ROFF_IGN;
3482}
3483
3484static int
3485roff_als(ROFF_ARGS)
3486{
3487	char		*oldn, *newn, *end, *value;
3488	size_t		 oldsz, newsz, valsz;
3489
3490	newn = oldn = buf->buf + pos;
3491	if (*newn == '\0')
3492		return ROFF_IGN;
3493
3494	newsz = roff_getname(r, &oldn, ln, pos);
3495	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3496		return ROFF_IGN;
3497
3498	end = oldn;
3499	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3500	if (oldsz == 0)
3501		return ROFF_IGN;
3502
3503	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3504	    (int)oldsz, oldn);
3505	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3506	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3507	free(value);
3508	return ROFF_IGN;
3509}
3510
3511/*
3512 * The .break request only makes sense inside conditionals,
3513 * and that case is already handled in roff_cond_sub().
3514 */
3515static int
3516roff_break(ROFF_ARGS)
3517{
3518	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3519	return ROFF_IGN;
3520}
3521
3522static int
3523roff_cc(ROFF_ARGS)
3524{
3525	const char	*p;
3526
3527	p = buf->buf + pos;
3528
3529	if (*p == '\0' || (r->control = *p++) == '.')
3530		r->control = '\0';
3531
3532	if (*p != '\0')
3533		mandoc_msg(MANDOCERR_ARG_EXCESS,
3534		    ln, p - buf->buf, "cc ... %s", p);
3535
3536	return ROFF_IGN;
3537}
3538
3539static int
3540roff_char(ROFF_ARGS)
3541{
3542	const char	*p, *kp, *vp;
3543	size_t		 ksz, vsz;
3544	int		 font;
3545
3546	/* Parse the character to be replaced. */
3547
3548	kp = buf->buf + pos;
3549	p = kp + 1;
3550	if (*kp == '\0' || (*kp == '\\' &&
3551	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3552	    (*p != ' ' && *p != '\0')) {
3553		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3554		return ROFF_IGN;
3555	}
3556	ksz = p - kp;
3557	while (*p == ' ')
3558		p++;
3559
3560	/*
3561	 * If the replacement string contains a font escape sequence,
3562	 * we have to restore the font at the end.
3563	 */
3564
3565	vp = p;
3566	vsz = strlen(p);
3567	font = 0;
3568	while (*p != '\0') {
3569		if (*p++ != '\\')
3570			continue;
3571		switch (mandoc_escape(&p, NULL, NULL)) {
3572		case ESCAPE_FONT:
3573		case ESCAPE_FONTROMAN:
3574		case ESCAPE_FONTITALIC:
3575		case ESCAPE_FONTBOLD:
3576		case ESCAPE_FONTBI:
3577		case ESCAPE_FONTCW:
3578		case ESCAPE_FONTPREV:
3579			font++;
3580			break;
3581		default:
3582			break;
3583		}
3584	}
3585	if (font > 1)
3586		mandoc_msg(MANDOCERR_CHAR_FONT,
3587		    ln, (int)(vp - buf->buf), "%s", vp);
3588
3589	/*
3590	 * Approximate the effect of .char using the .tr tables.
3591	 * XXX In groff, .char and .tr interact differently.
3592	 */
3593
3594	if (ksz == 1) {
3595		if (r->xtab == NULL)
3596			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3597		assert((unsigned int)*kp < 128);
3598		free(r->xtab[(int)*kp].p);
3599		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3600		    "%s%s", vp, font ? "\fP" : "");
3601	} else {
3602		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3603		if (font)
3604			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3605	}
3606	return ROFF_IGN;
3607}
3608
3609static int
3610roff_ec(ROFF_ARGS)
3611{
3612	const char	*p;
3613
3614	p = buf->buf + pos;
3615	if (*p == '\0')
3616		r->escape = '\\';
3617	else {
3618		r->escape = *p;
3619		if (*++p != '\0')
3620			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3621			    (int)(p - buf->buf), "ec ... %s", p);
3622	}
3623	return ROFF_IGN;
3624}
3625
3626static int
3627roff_eo(ROFF_ARGS)
3628{
3629	r->escape = '\0';
3630	if (buf->buf[pos] != '\0')
3631		mandoc_msg(MANDOCERR_ARG_SKIP,
3632		    ln, pos, "eo %s", buf->buf + pos);
3633	return ROFF_IGN;
3634}
3635
3636static int
3637roff_nop(ROFF_ARGS)
3638{
3639	while (buf->buf[pos] == ' ')
3640		pos++;
3641	*offs = pos;
3642	return ROFF_RERUN;
3643}
3644
3645static int
3646roff_tr(ROFF_ARGS)
3647{
3648	const char	*p, *first, *second;
3649	size_t		 fsz, ssz;
3650	enum mandoc_esc	 esc;
3651
3652	p = buf->buf + pos;
3653
3654	if (*p == '\0') {
3655		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3656		return ROFF_IGN;
3657	}
3658
3659	while (*p != '\0') {
3660		fsz = ssz = 1;
3661
3662		first = p++;
3663		if (*first == '\\') {
3664			esc = mandoc_escape(&p, NULL, NULL);
3665			if (esc == ESCAPE_ERROR) {
3666				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3667				    (int)(p - buf->buf), "%s", first);
3668				return ROFF_IGN;
3669			}
3670			fsz = (size_t)(p - first);
3671		}
3672
3673		second = p++;
3674		if (*second == '\\') {
3675			esc = mandoc_escape(&p, NULL, NULL);
3676			if (esc == ESCAPE_ERROR) {
3677				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3678				    (int)(p - buf->buf), "%s", second);
3679				return ROFF_IGN;
3680			}
3681			ssz = (size_t)(p - second);
3682		} else if (*second == '\0') {
3683			mandoc_msg(MANDOCERR_TR_ODD, ln,
3684			    (int)(first - buf->buf), "tr %s", first);
3685			second = " ";
3686			p--;
3687		}
3688
3689		if (fsz > 1) {
3690			roff_setstrn(&r->xmbtab, first, fsz,
3691			    second, ssz, 0);
3692			continue;
3693		}
3694
3695		if (r->xtab == NULL)
3696			r->xtab = mandoc_calloc(128,
3697			    sizeof(struct roffstr));
3698
3699		free(r->xtab[(int)*first].p);
3700		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3701		r->xtab[(int)*first].sz = ssz;
3702	}
3703
3704	return ROFF_IGN;
3705}
3706
3707/*
3708 * Implementation of the .return request.
3709 * There is no need to call roff_userret() from here.
3710 * The read module will call that after rewinding the reader stack
3711 * to the place from where the current macro was called.
3712 */
3713static int
3714roff_return(ROFF_ARGS)
3715{
3716	if (r->mstackpos >= 0)
3717		return ROFF_IGN | ROFF_USERRET;
3718
3719	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3720	return ROFF_IGN;
3721}
3722
3723static int
3724roff_rn(ROFF_ARGS)
3725{
3726	const char	*value;
3727	char		*oldn, *newn, *end;
3728	size_t		 oldsz, newsz;
3729	int		 deftype;
3730
3731	oldn = newn = buf->buf + pos;
3732	if (*oldn == '\0')
3733		return ROFF_IGN;
3734
3735	oldsz = roff_getname(r, &newn, ln, pos);
3736	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3737		return ROFF_IGN;
3738
3739	end = newn;
3740	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3741	if (newsz == 0)
3742		return ROFF_IGN;
3743
3744	deftype = ROFFDEF_ANY;
3745	value = roff_getstrn(r, oldn, oldsz, &deftype);
3746	switch (deftype) {
3747	case ROFFDEF_USER:
3748		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3749		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3750		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3751		break;
3752	case ROFFDEF_PRE:
3753		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3754		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3755		break;
3756	case ROFFDEF_REN:
3757		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3758		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3759		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3760		break;
3761	case ROFFDEF_STD:
3762		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3763		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3764		break;
3765	default:
3766		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3767		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3768		break;
3769	}
3770	return ROFF_IGN;
3771}
3772
3773static int
3774roff_shift(ROFF_ARGS)
3775{
3776	struct mctx	*ctx;
3777	int		 levels, i;
3778
3779	levels = 1;
3780	if (buf->buf[pos] != '\0' &&
3781	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3782		mandoc_msg(MANDOCERR_CE_NONUM,
3783		    ln, pos, "shift %s", buf->buf + pos);
3784		levels = 1;
3785	}
3786	if (r->mstackpos < 0) {
3787		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3788		return ROFF_IGN;
3789	}
3790	ctx = r->mstack + r->mstackpos;
3791	if (levels > ctx->argc) {
3792		mandoc_msg(MANDOCERR_SHIFT,
3793		    ln, pos, "%d, but max is %d", levels, ctx->argc);
3794		levels = ctx->argc;
3795	}
3796	if (levels == 0)
3797		return ROFF_IGN;
3798	for (i = 0; i < levels; i++)
3799		free(ctx->argv[i]);
3800	ctx->argc -= levels;
3801	for (i = 0; i < ctx->argc; i++)
3802		ctx->argv[i] = ctx->argv[i + levels];
3803	return ROFF_IGN;
3804}
3805
3806static int
3807roff_so(ROFF_ARGS)
3808{
3809	char *name, *cp;
3810
3811	name = buf->buf + pos;
3812	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3813
3814	/*
3815	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3816	 * opening anything that's not in our cwd or anything beneath
3817	 * it.  Thus, explicitly disallow traversing up the file-system
3818	 * or using absolute paths.
3819	 */
3820
3821	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3822		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3823		buf->sz = mandoc_asprintf(&cp,
3824		    ".sp\nSee the file %s.\n.sp", name) + 1;
3825		free(buf->buf);
3826		buf->buf = cp;
3827		*offs = 0;
3828		return ROFF_REPARSE;
3829	}
3830
3831	*offs = pos;
3832	return ROFF_SO;
3833}
3834
3835/* --- user defined strings and macros ------------------------------------ */
3836
3837static int
3838roff_userdef(ROFF_ARGS)
3839{
3840	struct mctx	 *ctx;
3841	char		 *arg, *ap, *dst, *src;
3842	size_t		  sz;
3843
3844	/* If the macro is empty, ignore it altogether. */
3845
3846	if (*r->current_string == '\0')
3847		return ROFF_IGN;
3848
3849	/* Initialize a new macro stack context. */
3850
3851	if (++r->mstackpos == r->mstacksz) {
3852		r->mstack = mandoc_recallocarray(r->mstack,
3853		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3854		r->mstacksz += 8;
3855	}
3856	ctx = r->mstack + r->mstackpos;
3857	ctx->argsz = 0;
3858	ctx->argc = 0;
3859	ctx->argv = NULL;
3860
3861	/*
3862	 * Collect pointers to macro argument strings,
3863	 * NUL-terminating them and escaping quotes.
3864	 */
3865
3866	src = buf->buf + pos;
3867	while (*src != '\0') {
3868		if (ctx->argc == ctx->argsz) {
3869			ctx->argsz += 8;
3870			ctx->argv = mandoc_reallocarray(ctx->argv,
3871			    ctx->argsz, sizeof(*ctx->argv));
3872		}
3873		arg = roff_getarg(r, &src, ln, &pos);
3874		sz = 1;  /* For the terminating NUL. */
3875		for (ap = arg; *ap != '\0'; ap++)
3876			sz += *ap == '"' ? 4 : 1;
3877		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3878		for (ap = arg; *ap != '\0'; ap++) {
3879			if (*ap == '"') {
3880				memcpy(dst, "\\(dq", 4);
3881				dst += 4;
3882			} else
3883				*dst++ = *ap;
3884		}
3885		*dst = '\0';
3886		free(arg);
3887	}
3888
3889	/* Replace the macro invocation by the macro definition. */
3890
3891	free(buf->buf);
3892	buf->buf = mandoc_strdup(r->current_string);
3893	buf->sz = strlen(buf->buf) + 1;
3894	*offs = 0;
3895
3896	return buf->buf[buf->sz - 2] == '\n' ?
3897	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3898}
3899
3900/*
3901 * Calling a high-level macro that was renamed with .rn.
3902 * r->current_string has already been set up by roff_parse().
3903 */
3904static int
3905roff_renamed(ROFF_ARGS)
3906{
3907	char	*nbuf;
3908
3909	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3910	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3911	free(buf->buf);
3912	buf->buf = nbuf;
3913	*offs = 0;
3914	return ROFF_CONT;
3915}
3916
3917/*
3918 * Measure the length in bytes of the roff identifier at *cpp
3919 * and advance the pointer to the next word.
3920 */
3921static size_t
3922roff_getname(struct roff *r, char **cpp, int ln, int pos)
3923{
3924	char	 *name, *cp;
3925	size_t	  namesz;
3926
3927	name = *cpp;
3928	if (*name == '\0')
3929		return 0;
3930
3931	/* Advance cp to the byte after the end of the name. */
3932
3933	for (cp = name; 1; cp++) {
3934		namesz = cp - name;
3935		if (*cp == '\0')
3936			break;
3937		if (*cp == ' ' || *cp == '\t') {
3938			cp++;
3939			break;
3940		}
3941		if (*cp != '\\')
3942			continue;
3943		if (cp[1] == '{' || cp[1] == '}')
3944			break;
3945		if (*++cp == '\\')
3946			continue;
3947		mandoc_msg(MANDOCERR_NAMESC, ln, pos,
3948		    "%.*s", (int)(cp - name + 1), name);
3949		mandoc_escape((const char **)&cp, NULL, NULL);
3950		break;
3951	}
3952
3953	/* Read past spaces. */
3954
3955	while (*cp == ' ')
3956		cp++;
3957
3958	*cpp = cp;
3959	return namesz;
3960}
3961
3962/*
3963 * Store *string into the user-defined string called *name.
3964 * To clear an existing entry, call with (*r, *name, NULL, 0).
3965 * append == 0: replace mode
3966 * append == 1: single-line append mode
3967 * append == 2: multiline append mode, append '\n' after each call
3968 */
3969static void
3970roff_setstr(struct roff *r, const char *name, const char *string,
3971	int append)
3972{
3973	size_t	 namesz;
3974
3975	namesz = strlen(name);
3976	roff_setstrn(&r->strtab, name, namesz, string,
3977	    string ? strlen(string) : 0, append);
3978	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3979}
3980
3981static void
3982roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3983		const char *string, size_t stringsz, int append)
3984{
3985	struct roffkv	*n;
3986	char		*c;
3987	int		 i;
3988	size_t		 oldch, newch;
3989
3990	/* Search for an existing string with the same name. */
3991	n = *r;
3992
3993	while (n && (namesz != n->key.sz ||
3994			strncmp(n->key.p, name, namesz)))
3995		n = n->next;
3996
3997	if (NULL == n) {
3998		/* Create a new string table entry. */
3999		n = mandoc_malloc(sizeof(struct roffkv));
4000		n->key.p = mandoc_strndup(name, namesz);
4001		n->key.sz = namesz;
4002		n->val.p = NULL;
4003		n->val.sz = 0;
4004		n->next = *r;
4005		*r = n;
4006	} else if (0 == append) {
4007		free(n->val.p);
4008		n->val.p = NULL;
4009		n->val.sz = 0;
4010	}
4011
4012	if (NULL == string)
4013		return;
4014
4015	/*
4016	 * One additional byte for the '\n' in multiline mode,
4017	 * and one for the terminating '\0'.
4018	 */
4019	newch = stringsz + (1 < append ? 2u : 1u);
4020
4021	if (NULL == n->val.p) {
4022		n->val.p = mandoc_malloc(newch);
4023		*n->val.p = '\0';
4024		oldch = 0;
4025	} else {
4026		oldch = n->val.sz;
4027		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4028	}
4029
4030	/* Skip existing content in the destination buffer. */
4031	c = n->val.p + (int)oldch;
4032
4033	/* Append new content to the destination buffer. */
4034	i = 0;
4035	while (i < (int)stringsz) {
4036		/*
4037		 * Rudimentary roff copy mode:
4038		 * Handle escaped backslashes.
4039		 */
4040		if ('\\' == string[i] && '\\' == string[i + 1])
4041			i++;
4042		*c++ = string[i++];
4043	}
4044
4045	/* Append terminating bytes. */
4046	if (1 < append)
4047		*c++ = '\n';
4048
4049	*c = '\0';
4050	n->val.sz = (int)(c - n->val.p);
4051}
4052
4053static const char *
4054roff_getstrn(struct roff *r, const char *name, size_t len,
4055    int *deftype)
4056{
4057	const struct roffkv	*n;
4058	int			 found, i;
4059	enum roff_tok		 tok;
4060
4061	found = 0;
4062	for (n = r->strtab; n != NULL; n = n->next) {
4063		if (strncmp(name, n->key.p, len) != 0 ||
4064		    n->key.p[len] != '\0' || n->val.p == NULL)
4065			continue;
4066		if (*deftype & ROFFDEF_USER) {
4067			*deftype = ROFFDEF_USER;
4068			return n->val.p;
4069		} else {
4070			found = 1;
4071			break;
4072		}
4073	}
4074	for (n = r->rentab; n != NULL; n = n->next) {
4075		if (strncmp(name, n->key.p, len) != 0 ||
4076		    n->key.p[len] != '\0' || n->val.p == NULL)
4077			continue;
4078		if (*deftype & ROFFDEF_REN) {
4079			*deftype = ROFFDEF_REN;
4080			return n->val.p;
4081		} else {
4082			found = 1;
4083			break;
4084		}
4085	}
4086	for (i = 0; i < PREDEFS_MAX; i++) {
4087		if (strncmp(name, predefs[i].name, len) != 0 ||
4088		    predefs[i].name[len] != '\0')
4089			continue;
4090		if (*deftype & ROFFDEF_PRE) {
4091			*deftype = ROFFDEF_PRE;
4092			return predefs[i].str;
4093		} else {
4094			found = 1;
4095			break;
4096		}
4097	}
4098	if (r->man->meta.macroset != MACROSET_MAN) {
4099		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4100			if (strncmp(name, roff_name[tok], len) != 0 ||
4101			    roff_name[tok][len] != '\0')
4102				continue;
4103			if (*deftype & ROFFDEF_STD) {
4104				*deftype = ROFFDEF_STD;
4105				return NULL;
4106			} else {
4107				found = 1;
4108				break;
4109			}
4110		}
4111	}
4112	if (r->man->meta.macroset != MACROSET_MDOC) {
4113		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4114			if (strncmp(name, roff_name[tok], len) != 0 ||
4115			    roff_name[tok][len] != '\0')
4116				continue;
4117			if (*deftype & ROFFDEF_STD) {
4118				*deftype = ROFFDEF_STD;
4119				return NULL;
4120			} else {
4121				found = 1;
4122				break;
4123			}
4124		}
4125	}
4126
4127	if (found == 0 && *deftype != ROFFDEF_ANY) {
4128		if (*deftype & ROFFDEF_REN) {
4129			/*
4130			 * This might still be a request,
4131			 * so do not treat it as undefined yet.
4132			 */
4133			*deftype = ROFFDEF_UNDEF;
4134			return NULL;
4135		}
4136
4137		/* Using an undefined string defines it to be empty. */
4138
4139		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4140		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4141	}
4142
4143	*deftype = 0;
4144	return NULL;
4145}
4146
4147static void
4148roff_freestr(struct roffkv *r)
4149{
4150	struct roffkv	 *n, *nn;
4151
4152	for (n = r; n; n = nn) {
4153		free(n->key.p);
4154		free(n->val.p);
4155		nn = n->next;
4156		free(n);
4157	}
4158}
4159
4160/* --- accessors and utility functions ------------------------------------ */
4161
4162/*
4163 * Duplicate an input string, making the appropriate character
4164 * conversations (as stipulated by `tr') along the way.
4165 * Returns a heap-allocated string with all the replacements made.
4166 */
4167char *
4168roff_strdup(const struct roff *r, const char *p)
4169{
4170	const struct roffkv *cp;
4171	char		*res;
4172	const char	*pp;
4173	size_t		 ssz, sz;
4174	enum mandoc_esc	 esc;
4175
4176	if (NULL == r->xmbtab && NULL == r->xtab)
4177		return mandoc_strdup(p);
4178	else if ('\0' == *p)
4179		return mandoc_strdup("");
4180
4181	/*
4182	 * Step through each character looking for term matches
4183	 * (remember that a `tr' can be invoked with an escape, which is
4184	 * a glyph but the escape is multi-character).
4185	 * We only do this if the character hash has been initialised
4186	 * and the string is >0 length.
4187	 */
4188
4189	res = NULL;
4190	ssz = 0;
4191
4192	while ('\0' != *p) {
4193		assert((unsigned int)*p < 128);
4194		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4195			sz = r->xtab[(int)*p].sz;
4196			res = mandoc_realloc(res, ssz + sz + 1);
4197			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4198			ssz += sz;
4199			p++;
4200			continue;
4201		} else if ('\\' != *p) {
4202			res = mandoc_realloc(res, ssz + 2);
4203			res[ssz++] = *p++;
4204			continue;
4205		}
4206
4207		/* Search for term matches. */
4208		for (cp = r->xmbtab; cp; cp = cp->next)
4209			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4210				break;
4211
4212		if (NULL != cp) {
4213			/*
4214			 * A match has been found.
4215			 * Append the match to the array and move
4216			 * forward by its keysize.
4217			 */
4218			res = mandoc_realloc(res,
4219			    ssz + cp->val.sz + 1);
4220			memcpy(res + ssz, cp->val.p, cp->val.sz);
4221			ssz += cp->val.sz;
4222			p += (int)cp->key.sz;
4223			continue;
4224		}
4225
4226		/*
4227		 * Handle escapes carefully: we need to copy
4228		 * over just the escape itself, or else we might
4229		 * do replacements within the escape itself.
4230		 * Make sure to pass along the bogus string.
4231		 */
4232		pp = p++;
4233		esc = mandoc_escape(&p, NULL, NULL);
4234		if (ESCAPE_ERROR == esc) {
4235			sz = strlen(pp);
4236			res = mandoc_realloc(res, ssz + sz + 1);
4237			memcpy(res + ssz, pp, sz);
4238			break;
4239		}
4240		/*
4241		 * We bail out on bad escapes.
4242		 * No need to warn: we already did so when
4243		 * roff_expand() was called.
4244		 */
4245		sz = (int)(p - pp);
4246		res = mandoc_realloc(res, ssz + sz + 1);
4247		memcpy(res + ssz, pp, sz);
4248		ssz += sz;
4249	}
4250
4251	res[(int)ssz] = '\0';
4252	return res;
4253}
4254
4255int
4256roff_getformat(const struct roff *r)
4257{
4258
4259	return r->format;
4260}
4261
4262/*
4263 * Find out whether a line is a macro line or not.
4264 * If it is, adjust the current position and return one; if it isn't,
4265 * return zero and don't change the current position.
4266 * If the control character has been set with `.cc', then let that grain
4267 * precedence.
4268 * This is slighly contrary to groff, where using the non-breaking
4269 * control character when `cc' has been invoked will cause the
4270 * non-breaking macro contents to be printed verbatim.
4271 */
4272int
4273roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4274{
4275	int		pos;
4276
4277	pos = *ppos;
4278
4279	if (r->control != '\0' && cp[pos] == r->control)
4280		pos++;
4281	else if (r->control != '\0')
4282		return 0;
4283	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4284		pos += 2;
4285	else if ('.' == cp[pos] || '\'' == cp[pos])
4286		pos++;
4287	else
4288		return 0;
4289
4290	while (' ' == cp[pos] || '\t' == cp[pos])
4291		pos++;
4292
4293	*ppos = pos;
4294	return 1;
4295}
4296