1/*	$Id: roff.c,v 1.324 2017/07/14 17:16:16 schwarze Exp $ */
2/*
3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stddef.h>
26#include <stdint.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "mandoc.h"
32#include "mandoc_aux.h"
33#include "mandoc_ohash.h"
34#include "roff.h"
35#include "libmandoc.h"
36#include "roff_int.h"
37#include "libroff.h"
38
39/* Maximum number of string expansions per line, to break infinite loops. */
40#define	EXPAND_LIMIT	1000
41
42/* Types of definitions of macros and strings. */
43#define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
44#define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
45#define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
46#define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
47#define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
48			 ROFFDEF_REN | ROFFDEF_STD)
49
50/* --- data types --------------------------------------------------------- */
51
52/*
53 * An incredibly-simple string buffer.
54 */
55struct	roffstr {
56	char		*p; /* nil-terminated buffer */
57	size_t		 sz; /* saved strlen(p) */
58};
59
60/*
61 * A key-value roffstr pair as part of a singly-linked list.
62 */
63struct	roffkv {
64	struct roffstr	 key;
65	struct roffstr	 val;
66	struct roffkv	*next; /* next in list */
67};
68
69/*
70 * A single number register as part of a singly-linked list.
71 */
72struct	roffreg {
73	struct roffstr	 key;
74	int		 val;
75	struct roffreg	*next;
76};
77
78/*
79 * Association of request and macro names with token IDs.
80 */
81struct	roffreq {
82	enum roff_tok	 tok;
83	char		 name[];
84};
85
86struct	roff {
87	struct mparse	*parse; /* parse point */
88	struct roff_man	*man; /* mdoc or man parser */
89	struct roffnode	*last; /* leaf of stack */
90	int		*rstack; /* stack of inverted `ie' values */
91	struct ohash	*reqtab; /* request lookup table */
92	struct roffreg	*regtab; /* number registers */
93	struct roffkv	*strtab; /* user-defined strings & macros */
94	struct roffkv	*rentab; /* renamed strings & macros */
95	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
96	struct roffstr	*xtab; /* single-byte trans table (`tr') */
97	const char	*current_string; /* value of last called user macro */
98	struct tbl_node	*first_tbl; /* first table parsed */
99	struct tbl_node	*last_tbl; /* last table parsed */
100	struct tbl_node	*tbl; /* current table being parsed */
101	struct eqn_node	*last_eqn; /* equation parser */
102	struct eqn_node	*eqn; /* active equation parser */
103	int		 eqn_inline; /* current equation is inline */
104	int		 options; /* parse options */
105	int		 rstacksz; /* current size limit of rstack */
106	int		 rstackpos; /* position in rstack */
107	int		 format; /* current file in mdoc or man format */
108	int		 argc; /* number of args of the last macro */
109	char		 control; /* control character */
110	char		 escape; /* escape character */
111};
112
113struct	roffnode {
114	enum roff_tok	 tok; /* type of node */
115	struct roffnode	*parent; /* up one in stack */
116	int		 line; /* parse line */
117	int		 col; /* parse col */
118	char		*name; /* node name, e.g. macro name */
119	char		*end; /* end-rules: custom token */
120	int		 endspan; /* end-rules: next-line or infty */
121	int		 rule; /* current evaluation rule */
122};
123
124#define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
125			 enum roff_tok tok, /* tok of macro */ \
126			 struct buf *buf, /* input buffer */ \
127			 int ln, /* parse line */ \
128			 int ppos, /* original pos in buffer */ \
129			 int pos, /* current pos in buffer */ \
130			 int *offs /* reset offset of buffer data */
131
132typedef	enum rofferr (*roffproc)(ROFF_ARGS);
133
134struct	roffmac {
135	roffproc	 proc; /* process new macro */
136	roffproc	 text; /* process as child text of macro */
137	roffproc	 sub; /* process as child of macro */
138	int		 flags;
139#define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
140};
141
142struct	predef {
143	const char	*name; /* predefined input name */
144	const char	*str; /* replacement symbol */
145};
146
147#define	PREDEF(__name, __str) \
148	{ (__name), (__str) },
149
150/* --- function prototypes ------------------------------------------------ */
151
152static	void		 roffnode_cleanscope(struct roff *);
153static	void		 roffnode_pop(struct roff *);
154static	void		 roffnode_push(struct roff *, enum roff_tok,
155				const char *, int, int);
156static	void		 roff_addtbl(struct roff_man *, struct tbl_node *);
157static	enum rofferr	 roff_als(ROFF_ARGS);
158static	enum rofferr	 roff_block(ROFF_ARGS);
159static	enum rofferr	 roff_block_text(ROFF_ARGS);
160static	enum rofferr	 roff_block_sub(ROFF_ARGS);
161static	enum rofferr	 roff_br(ROFF_ARGS);
162static	enum rofferr	 roff_cblock(ROFF_ARGS);
163static	enum rofferr	 roff_cc(ROFF_ARGS);
164static	void		 roff_ccond(struct roff *, int, int);
165static	enum rofferr	 roff_cond(ROFF_ARGS);
166static	enum rofferr	 roff_cond_text(ROFF_ARGS);
167static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
168static	enum rofferr	 roff_ds(ROFF_ARGS);
169static	enum rofferr	 roff_ec(ROFF_ARGS);
170static	enum rofferr	 roff_eo(ROFF_ARGS);
171static	enum rofferr	 roff_eqndelim(struct roff *, struct buf *, int);
172static	int		 roff_evalcond(struct roff *r, int, char *, int *);
173static	int		 roff_evalnum(struct roff *, int,
174				const char *, int *, int *, int);
175static	int		 roff_evalpar(struct roff *, int,
176				const char *, int *, int *, int);
177static	int		 roff_evalstrcond(const char *, int *);
178static	void		 roff_free1(struct roff *);
179static	void		 roff_freereg(struct roffreg *);
180static	void		 roff_freestr(struct roffkv *);
181static	size_t		 roff_getname(struct roff *, char **, int, int);
182static	int		 roff_getnum(const char *, int *, int *, int);
183static	int		 roff_getop(const char *, int *, char *);
184static	int		 roff_getregn(const struct roff *,
185				const char *, size_t);
186static	int		 roff_getregro(const struct roff *,
187				const char *name);
188static	const char	*roff_getstrn(const struct roff *,
189				const char *, size_t, int *);
190static	int		 roff_hasregn(const struct roff *,
191				const char *, size_t);
192static	enum rofferr	 roff_insec(ROFF_ARGS);
193static	enum rofferr	 roff_it(ROFF_ARGS);
194static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
195static	void		 roff_man_alloc1(struct roff_man *);
196static	void		 roff_man_free1(struct roff_man *);
197static	enum rofferr	 roff_manyarg(ROFF_ARGS);
198static	enum rofferr	 roff_nr(ROFF_ARGS);
199static	enum rofferr	 roff_onearg(ROFF_ARGS);
200static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
201				int, int);
202static	enum rofferr	 roff_parsetext(struct roff *, struct buf *,
203				int, int *);
204static	enum rofferr	 roff_renamed(ROFF_ARGS);
205static	enum rofferr	 roff_res(struct roff *, struct buf *, int, int);
206static	enum rofferr	 roff_rm(ROFF_ARGS);
207static	enum rofferr	 roff_rn(ROFF_ARGS);
208static	enum rofferr	 roff_rr(ROFF_ARGS);
209static	void		 roff_setstr(struct roff *,
210				const char *, const char *, int);
211static	void		 roff_setstrn(struct roffkv **, const char *,
212				size_t, const char *, size_t, int);
213static	enum rofferr	 roff_so(ROFF_ARGS);
214static	enum rofferr	 roff_tr(ROFF_ARGS);
215static	enum rofferr	 roff_Dd(ROFF_ARGS);
216static	enum rofferr	 roff_TE(ROFF_ARGS);
217static	enum rofferr	 roff_TS(ROFF_ARGS);
218static	enum rofferr	 roff_EQ(ROFF_ARGS);
219static	enum rofferr	 roff_EN(ROFF_ARGS);
220static	enum rofferr	 roff_T_(ROFF_ARGS);
221static	enum rofferr	 roff_unsupp(ROFF_ARGS);
222static	enum rofferr	 roff_userdef(ROFF_ARGS);
223
224/* --- constant data ------------------------------------------------------ */
225
226#define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
227#define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
228
229const char *__roff_name[MAN_MAX + 1] = {
230	"br",		"ce",		"ft",		"ll",
231	"mc",		"po",		"rj",		"sp",
232	"ta",		"ti",		NULL,
233	"ab",		"ad",		"af",		"aln",
234	"als",		"am",		"am1",		"ami",
235	"ami1",		"as",		"as1",		"asciify",
236	"backtrace",	"bd",		"bleedat",	"blm",
237        "box",		"boxa",		"bp",		"BP",
238	"break",	"breakchar",	"brnl",		"brp",
239	"brpnl",	"c2",		"cc",
240	"cf",		"cflags",	"ch",		"char",
241	"chop",		"class",	"close",	"CL",
242	"color",	"composite",	"continue",	"cp",
243	"cropat",	"cs",		"cu",		"da",
244	"dch",		"Dd",		"de",		"de1",
245	"defcolor",	"dei",		"dei1",		"device",
246	"devicem",	"di",		"do",		"ds",
247	"ds1",		"dwh",		"dt",		"ec",
248	"ecr",		"ecs",		"el",		"em",
249	"EN",		"eo",		"EP",		"EQ",
250	"errprint",	"ev",		"evc",		"ex",
251	"fallback",	"fam",		"fc",		"fchar",
252	"fcolor",	"fdeferlig",	"feature",	"fkern",
253	"fl",		"flig",		"fp",		"fps",
254	"fschar",	"fspacewidth",	"fspecial",	"ftr",
255	"fzoom",	"gcolor",	"hc",		"hcode",
256	"hidechar",	"hla",		"hlm",		"hpf",
257	"hpfa",		"hpfcode",	"hw",		"hy",
258	"hylang",	"hylen",	"hym",		"hypp",
259	"hys",		"ie",		"if",		"ig",
260	"index",	"it",		"itc",		"IX",
261	"kern",		"kernafter",	"kernbefore",	"kernpair",
262	"lc",		"lc_ctype",	"lds",		"length",
263	"letadj",	"lf",		"lg",		"lhang",
264	"linetabs",	"lnr",		"lnrf",		"lpfx",
265	"ls",		"lsm",		"lt",
266	"mediasize",	"minss",	"mk",		"mso",
267	"na",		"ne",		"nh",		"nhychar",
268	"nm",		"nn",		"nop",		"nr",
269	"nrf",		"nroff",	"ns",		"nx",
270	"open",		"opena",	"os",		"output",
271	"padj",		"papersize",	"pc",		"pev",
272	"pi",		"PI",		"pl",		"pm",
273	"pn",		"pnr",		"ps",
274	"psbb",		"pshape",	"pso",		"ptr",
275	"pvs",		"rchar",	"rd",		"recursionlimit",
276	"return",	"rfschar",	"rhang",
277	"rm",		"rn",		"rnn",		"rr",
278	"rs",		"rt",		"schar",	"sentchar",
279	"shc",		"shift",	"sizes",	"so",
280	"spacewidth",	"special",	"spreadwarn",	"ss",
281	"sty",		"substring",	"sv",		"sy",
282	"T&",		"tc",		"TE",
283	"TH",		"tkf",		"tl",
284	"tm",		"tm1",		"tmc",		"tr",
285	"track",	"transchar",	"trf",		"trimat",
286	"trin",		"trnt",		"troff",	"TS",
287	"uf",		"ul",		"unformat",	"unwatch",
288	"unwatchn",	"vpt",		"vs",		"warn",
289	"warnscale",	"watch",	"watchlength",	"watchn",
290	"wh",		"while",	"write",	"writec",
291	"writem",	"xflag",	".",		NULL,
292	NULL,		"text",
293	"Dd",		"Dt",		"Os",		"Sh",
294	"Ss",		"Pp",		"D1",		"Dl",
295	"Bd",		"Ed",		"Bl",		"El",
296	"It",		"Ad",		"An",		"Ap",
297	"Ar",		"Cd",		"Cm",		"Dv",
298	"Er",		"Ev",		"Ex",		"Fa",
299	"Fd",		"Fl",		"Fn",		"Ft",
300	"Ic",		"In",		"Li",		"Nd",
301	"Nm",		"Op",		"Ot",		"Pa",
302	"Rv",		"St",		"Va",		"Vt",
303	"Xr",		"%A",		"%B",		"%D",
304	"%I",		"%J",		"%N",		"%O",
305	"%P",		"%R",		"%T",		"%V",
306	"Ac",		"Ao",		"Aq",		"At",
307	"Bc",		"Bf",		"Bo",		"Bq",
308	"Bsx",		"Bx",		"Db",		"Dc",
309	"Do",		"Dq",		"Ec",		"Ef",
310	"Em",		"Eo",		"Fx",		"Ms",
311	"No",		"Ns",		"Nx",		"Ox",
312	"Pc",		"Pf",		"Po",		"Pq",
313	"Qc",		"Ql",		"Qo",		"Qq",
314	"Re",		"Rs",		"Sc",		"So",
315	"Sq",		"Sm",		"Sx",		"Sy",
316	"Tn",		"Ux",		"Xc",		"Xo",
317	"Fo",		"Fc",		"Oo",		"Oc",
318	"Bk",		"Ek",		"Bt",		"Hf",
319	"Fr",		"Ud",		"Lb",		"Lp",
320	"Lk",		"Mt",		"Brq",		"Bro",
321	"Brc",		"%C",		"Es",		"En",
322	"Dx",		"%Q",		"%U",		"Ta",
323	NULL,
324	"TH",		"SH",		"SS",		"TP",
325	"LP",		"PP",		"P",		"IP",
326	"HP",		"SM",		"SB",		"BI",
327	"IB",		"BR",		"RB",		"R",
328	"B",		"I",		"IR",		"RI",
329	"nf",		"fi",
330	"RE",		"RS",		"DT",		"UC",
331	"PD",		"AT",		"in",
332	"OP",		"EX",		"EE",		"UR",
333	"UE",		"MT",		"ME",		NULL
334};
335const	char *const *roff_name = __roff_name;
336
337static	struct roffmac	 roffs[TOKEN_NONE] = {
338	{ roff_br, NULL, NULL, 0 },  /* br */
339	{ roff_onearg, NULL, NULL, 0 },  /* ce */
340	{ roff_onearg, NULL, NULL, 0 },  /* ft */
341	{ roff_onearg, NULL, NULL, 0 },  /* ll */
342	{ roff_onearg, NULL, NULL, 0 },  /* mc */
343	{ roff_onearg, NULL, NULL, 0 },  /* po */
344	{ roff_onearg, NULL, NULL, 0 },  /* rj */
345	{ roff_onearg, NULL, NULL, 0 },  /* sp */
346	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
347	{ roff_onearg, NULL, NULL, 0 },  /* ti */
348	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
349	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
350	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
351	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
352	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
353	{ roff_als, NULL, NULL, 0 },  /* als */
354	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
355	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
356	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
357	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
358	{ roff_ds, NULL, NULL, 0 },  /* as */
359	{ roff_ds, NULL, NULL, 0 },  /* as1 */
360	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
361	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
362	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
363	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
364	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
365	{ roff_unsupp, NULL, NULL, 0 },  /* box */
366	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
367	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
368	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
369	{ roff_unsupp, NULL, NULL, 0 },  /* break */
370	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
371	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
372	{ roff_br, NULL, NULL, 0 },  /* brp */
373	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
374	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
375	{ roff_cc, NULL, NULL, 0 },  /* cc */
376	{ roff_insec, NULL, NULL, 0 },  /* cf */
377	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
378	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
379	{ roff_unsupp, NULL, NULL, 0 },  /* char */
380	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
381	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
382	{ roff_insec, NULL, NULL, 0 },  /* close */
383	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
384	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
385	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
386	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
387	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
388	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
389	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
390	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
391	{ roff_unsupp, NULL, NULL, 0 },  /* da */
392	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
393	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
394	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
395	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
396	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
397	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
398	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
399	{ roff_unsupp, NULL, NULL, 0 },  /* device */
400	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
401	{ roff_unsupp, NULL, NULL, 0 },  /* di */
402	{ roff_unsupp, NULL, NULL, 0 },  /* do */
403	{ roff_ds, NULL, NULL, 0 },  /* ds */
404	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
405	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
406	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
407	{ roff_ec, NULL, NULL, 0 },  /* ec */
408	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
409	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
410	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
411	{ roff_unsupp, NULL, NULL, 0 },  /* em */
412	{ roff_EN, NULL, NULL, 0 },  /* EN */
413	{ roff_eo, NULL, NULL, 0 },  /* eo */
414	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
415	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
416	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
417	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
418	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
419	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
420	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
421	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
422	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
423	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
424	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
425	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
426	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
427	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
428	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
429	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
430	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
431	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
432	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
433	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
434	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
435	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
436	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
437	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
438	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
439	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
440	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
441	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
442	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
443	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
444	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
445	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
446	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
447	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
448	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
449	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
450	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
451	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
452	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
453	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
454	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
455	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
456	{ roff_unsupp, NULL, NULL, 0 },  /* index */
457	{ roff_it, NULL, NULL, 0 },  /* it */
458	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
459	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
460	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
461	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
462	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
463	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
464	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
465	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
466	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
467	{ roff_unsupp, NULL, NULL, 0 },  /* length */
468	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
469	{ roff_insec, NULL, NULL, 0 },  /* lf */
470	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
471	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
472	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
473	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
474	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
475	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
476	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
477	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
478	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
479	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
480	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
481	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
482	{ roff_insec, NULL, NULL, 0 },  /* mso */
483	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
484	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
485	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
486	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
487	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
488	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
489	{ roff_unsupp, NULL, NULL, 0 },  /* nop */
490	{ roff_nr, NULL, NULL, 0 },  /* nr */
491	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
492	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
493	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
494	{ roff_insec, NULL, NULL, 0 },  /* nx */
495	{ roff_insec, NULL, NULL, 0 },  /* open */
496	{ roff_insec, NULL, NULL, 0 },  /* opena */
497	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
498	{ roff_unsupp, NULL, NULL, 0 },  /* output */
499	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
500	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
501	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
502	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
503	{ roff_insec, NULL, NULL, 0 },  /* pi */
504	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
505	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
506	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
507	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
508	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
509	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
510	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
511	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
512	{ roff_insec, NULL, NULL, 0 },  /* pso */
513	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
514	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
515	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
516	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
517	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
518	{ roff_unsupp, NULL, NULL, 0 },  /* return */
519	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
520	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
521	{ roff_rm, NULL, NULL, 0 },  /* rm */
522	{ roff_rn, NULL, NULL, 0 },  /* rn */
523	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
524	{ roff_rr, NULL, NULL, 0 },  /* rr */
525	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
526	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
527	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
528	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
529	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
530	{ roff_unsupp, NULL, NULL, 0 },  /* shift */
531	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
532	{ roff_so, NULL, NULL, 0 },  /* so */
533	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
534	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
535	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
536	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
537	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
538	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
539	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
540	{ roff_insec, NULL, NULL, 0 },  /* sy */
541	{ roff_T_, NULL, NULL, 0 },  /* T& */
542	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
543	{ roff_TE, NULL, NULL, 0 },  /* TE */
544	{ roff_Dd, NULL, NULL, 0 },  /* TH */
545	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
546	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
547	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
548	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
549	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
550	{ roff_tr, NULL, NULL, 0 },  /* tr */
551	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
552	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
553	{ roff_insec, NULL, NULL, 0 },  /* trf */
554	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
555	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
556	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
557	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
558	{ roff_TS, NULL, NULL, 0 },  /* TS */
559	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
560	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
561	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
562	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
563	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
564	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
565	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
566	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
567	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
568	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
569	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
570	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
571	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
572	{ roff_unsupp, NULL, NULL, 0 },  /* while */
573	{ roff_insec, NULL, NULL, 0 },  /* write */
574	{ roff_insec, NULL, NULL, 0 },  /* writec */
575	{ roff_insec, NULL, NULL, 0 },  /* writem */
576	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
577	{ roff_cblock, NULL, NULL, 0 },  /* . */
578	{ roff_renamed, NULL, NULL, 0 },
579	{ roff_userdef, NULL, NULL, 0 }
580};
581
582/* Array of injected predefined strings. */
583#define	PREDEFS_MAX	 38
584static	const struct predef predefs[PREDEFS_MAX] = {
585#include "predefs.in"
586};
587
588static	int	 roffce_lines;	/* number of input lines to center */
589static	struct roff_node *roffce_node;  /* active request */
590static	int	 roffit_lines;  /* number of lines to delay */
591static	char	*roffit_macro;  /* nil-terminated macro line */
592
593
594/* --- request table ------------------------------------------------------ */
595
596struct ohash *
597roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
598{
599	struct ohash	*htab;
600	struct roffreq	*req;
601	enum roff_tok	 tok;
602	size_t		 sz;
603	unsigned int	 slot;
604
605	htab = mandoc_malloc(sizeof(*htab));
606	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
607
608	for (tok = mintok; tok < maxtok; tok++) {
609		if (roff_name[tok] == NULL)
610			continue;
611		sz = strlen(roff_name[tok]);
612		req = mandoc_malloc(sizeof(*req) + sz + 1);
613		req->tok = tok;
614		memcpy(req->name, roff_name[tok], sz + 1);
615		slot = ohash_qlookup(htab, req->name);
616		ohash_insert(htab, slot, req);
617	}
618	return htab;
619}
620
621void
622roffhash_free(struct ohash *htab)
623{
624	struct roffreq	*req;
625	unsigned int	 slot;
626
627	if (htab == NULL)
628		return;
629	for (req = ohash_first(htab, &slot); req != NULL;
630	     req = ohash_next(htab, &slot))
631		free(req);
632	ohash_delete(htab);
633	free(htab);
634}
635
636enum roff_tok
637roffhash_find(struct ohash *htab, const char *name, size_t sz)
638{
639	struct roffreq	*req;
640	const char	*end;
641
642	if (sz) {
643		end = name + sz;
644		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
645	} else
646		req = ohash_find(htab, ohash_qlookup(htab, name));
647	return req == NULL ? TOKEN_NONE : req->tok;
648}
649
650/* --- stack of request blocks -------------------------------------------- */
651
652/*
653 * Pop the current node off of the stack of roff instructions currently
654 * pending.
655 */
656static void
657roffnode_pop(struct roff *r)
658{
659	struct roffnode	*p;
660
661	assert(r->last);
662	p = r->last;
663
664	r->last = r->last->parent;
665	free(p->name);
666	free(p->end);
667	free(p);
668}
669
670/*
671 * Push a roff node onto the instruction stack.  This must later be
672 * removed with roffnode_pop().
673 */
674static void
675roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
676		int line, int col)
677{
678	struct roffnode	*p;
679
680	p = mandoc_calloc(1, sizeof(struct roffnode));
681	p->tok = tok;
682	if (name)
683		p->name = mandoc_strdup(name);
684	p->parent = r->last;
685	p->line = line;
686	p->col = col;
687	p->rule = p->parent ? p->parent->rule : 0;
688
689	r->last = p;
690}
691
692/* --- roff parser state data management ---------------------------------- */
693
694static void
695roff_free1(struct roff *r)
696{
697	struct tbl_node	*tbl;
698	int		 i;
699
700	while (NULL != (tbl = r->first_tbl)) {
701		r->first_tbl = tbl->next;
702		tbl_free(tbl);
703	}
704	r->first_tbl = r->last_tbl = r->tbl = NULL;
705
706	if (r->last_eqn != NULL)
707		eqn_free(r->last_eqn);
708	r->last_eqn = r->eqn = NULL;
709
710	while (r->last)
711		roffnode_pop(r);
712
713	free (r->rstack);
714	r->rstack = NULL;
715	r->rstacksz = 0;
716	r->rstackpos = -1;
717
718	roff_freereg(r->regtab);
719	r->regtab = NULL;
720
721	roff_freestr(r->strtab);
722	roff_freestr(r->rentab);
723	roff_freestr(r->xmbtab);
724	r->strtab = r->rentab = r->xmbtab = NULL;
725
726	if (r->xtab)
727		for (i = 0; i < 128; i++)
728			free(r->xtab[i].p);
729	free(r->xtab);
730	r->xtab = NULL;
731}
732
733void
734roff_reset(struct roff *r)
735{
736	roff_free1(r);
737	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
738	r->control = '\0';
739	r->escape = '\\';
740	roffce_lines = 0;
741	roffce_node = NULL;
742	roffit_lines = 0;
743	roffit_macro = NULL;
744}
745
746void
747roff_free(struct roff *r)
748{
749	roff_free1(r);
750	roffhash_free(r->reqtab);
751	free(r);
752}
753
754struct roff *
755roff_alloc(struct mparse *parse, int options)
756{
757	struct roff	*r;
758
759	r = mandoc_calloc(1, sizeof(struct roff));
760	r->parse = parse;
761	r->reqtab = roffhash_alloc(0, ROFF_USERDEF);
762	r->options = options;
763	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
764	r->rstackpos = -1;
765	r->escape = '\\';
766	return r;
767}
768
769/* --- syntax tree state data management ---------------------------------- */
770
771static void
772roff_man_free1(struct roff_man *man)
773{
774
775	if (man->first != NULL)
776		roff_node_delete(man, man->first);
777	free(man->meta.msec);
778	free(man->meta.vol);
779	free(man->meta.os);
780	free(man->meta.arch);
781	free(man->meta.title);
782	free(man->meta.name);
783	free(man->meta.date);
784}
785
786static void
787roff_man_alloc1(struct roff_man *man)
788{
789
790	memset(&man->meta, 0, sizeof(man->meta));
791	man->first = mandoc_calloc(1, sizeof(*man->first));
792	man->first->type = ROFFT_ROOT;
793	man->last = man->first;
794	man->last_es = NULL;
795	man->flags = 0;
796	man->macroset = MACROSET_NONE;
797	man->lastsec = man->lastnamed = SEC_NONE;
798	man->next = ROFF_NEXT_CHILD;
799}
800
801void
802roff_man_reset(struct roff_man *man)
803{
804
805	roff_man_free1(man);
806	roff_man_alloc1(man);
807}
808
809void
810roff_man_free(struct roff_man *man)
811{
812
813	roff_man_free1(man);
814	free(man);
815}
816
817struct roff_man *
818roff_man_alloc(struct roff *roff, struct mparse *parse,
819	const char *os_s, int quick)
820{
821	struct roff_man *man;
822
823	man = mandoc_calloc(1, sizeof(*man));
824	man->parse = parse;
825	man->roff = roff;
826	man->os_s = os_s;
827	man->quick = quick;
828	roff_man_alloc1(man);
829	roff->man = man;
830	return man;
831}
832
833/* --- syntax tree handling ----------------------------------------------- */
834
835struct roff_node *
836roff_node_alloc(struct roff_man *man, int line, int pos,
837	enum roff_type type, int tok)
838{
839	struct roff_node	*n;
840
841	n = mandoc_calloc(1, sizeof(*n));
842	n->line = line;
843	n->pos = pos;
844	n->tok = tok;
845	n->type = type;
846	n->sec = man->lastsec;
847
848	if (man->flags & MDOC_SYNOPSIS)
849		n->flags |= NODE_SYNPRETTY;
850	else
851		n->flags &= ~NODE_SYNPRETTY;
852	if (man->flags & MDOC_NEWLINE)
853		n->flags |= NODE_LINE;
854	man->flags &= ~MDOC_NEWLINE;
855
856	return n;
857}
858
859void
860roff_node_append(struct roff_man *man, struct roff_node *n)
861{
862
863	switch (man->next) {
864	case ROFF_NEXT_SIBLING:
865		if (man->last->next != NULL) {
866			n->next = man->last->next;
867			man->last->next->prev = n;
868		} else
869			man->last->parent->last = n;
870		man->last->next = n;
871		n->prev = man->last;
872		n->parent = man->last->parent;
873		break;
874	case ROFF_NEXT_CHILD:
875		if (man->last->child != NULL) {
876			n->next = man->last->child;
877			man->last->child->prev = n;
878		} else
879			man->last->last = n;
880		man->last->child = n;
881		n->parent = man->last;
882		break;
883	default:
884		abort();
885	}
886	man->last = n;
887
888	switch (n->type) {
889	case ROFFT_HEAD:
890		n->parent->head = n;
891		break;
892	case ROFFT_BODY:
893		if (n->end != ENDBODY_NOT)
894			return;
895		n->parent->body = n;
896		break;
897	case ROFFT_TAIL:
898		n->parent->tail = n;
899		break;
900	default:
901		return;
902	}
903
904	/*
905	 * Copy over the normalised-data pointer of our parent.  Not
906	 * everybody has one, but copying a null pointer is fine.
907	 */
908
909	n->norm = n->parent->norm;
910	assert(n->parent->type == ROFFT_BLOCK);
911}
912
913void
914roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
915{
916	struct roff_node	*n;
917
918	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
919	n->string = roff_strdup(man->roff, word);
920	roff_node_append(man, n);
921	n->flags |= NODE_VALID | NODE_ENDED;
922	man->next = ROFF_NEXT_SIBLING;
923}
924
925void
926roff_word_append(struct roff_man *man, const char *word)
927{
928	struct roff_node	*n;
929	char			*addstr, *newstr;
930
931	n = man->last;
932	addstr = roff_strdup(man->roff, word);
933	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
934	free(addstr);
935	free(n->string);
936	n->string = newstr;
937	man->next = ROFF_NEXT_SIBLING;
938}
939
940void
941roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
942{
943	struct roff_node	*n;
944
945	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
946	roff_node_append(man, n);
947	man->next = ROFF_NEXT_CHILD;
948}
949
950struct roff_node *
951roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
952{
953	struct roff_node	*n;
954
955	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
956	roff_node_append(man, n);
957	man->next = ROFF_NEXT_CHILD;
958	return n;
959}
960
961struct roff_node *
962roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
963{
964	struct roff_node	*n;
965
966	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
967	roff_node_append(man, n);
968	man->next = ROFF_NEXT_CHILD;
969	return n;
970}
971
972struct roff_node *
973roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
974{
975	struct roff_node	*n;
976
977	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
978	roff_node_append(man, n);
979	man->next = ROFF_NEXT_CHILD;
980	return n;
981}
982
983static void
984roff_addtbl(struct roff_man *man, struct tbl_node *tbl)
985{
986	struct roff_node	*n;
987	const struct tbl_span	*span;
988
989	if (man->macroset == MACROSET_MAN)
990		man_breakscope(man, ROFF_TS);
991	while ((span = tbl_span(tbl)) != NULL) {
992		n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE);
993		n->span = span;
994		roff_node_append(man, n);
995		n->flags |= NODE_VALID | NODE_ENDED;
996		man->next = ROFF_NEXT_SIBLING;
997	}
998}
999
1000void
1001roff_node_unlink(struct roff_man *man, struct roff_node *n)
1002{
1003
1004	/* Adjust siblings. */
1005
1006	if (n->prev)
1007		n->prev->next = n->next;
1008	if (n->next)
1009		n->next->prev = n->prev;
1010
1011	/* Adjust parent. */
1012
1013	if (n->parent != NULL) {
1014		if (n->parent->child == n)
1015			n->parent->child = n->next;
1016		if (n->parent->last == n)
1017			n->parent->last = n->prev;
1018	}
1019
1020	/* Adjust parse point. */
1021
1022	if (man == NULL)
1023		return;
1024	if (man->last == n) {
1025		if (n->prev == NULL) {
1026			man->last = n->parent;
1027			man->next = ROFF_NEXT_CHILD;
1028		} else {
1029			man->last = n->prev;
1030			man->next = ROFF_NEXT_SIBLING;
1031		}
1032	}
1033	if (man->first == n)
1034		man->first = NULL;
1035}
1036
1037void
1038roff_node_free(struct roff_node *n)
1039{
1040
1041	if (n->args != NULL)
1042		mdoc_argv_free(n->args);
1043	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1044		free(n->norm);
1045	if (n->eqn != NULL)
1046		eqn_box_free(n->eqn);
1047	free(n->string);
1048	free(n);
1049}
1050
1051void
1052roff_node_delete(struct roff_man *man, struct roff_node *n)
1053{
1054
1055	while (n->child != NULL)
1056		roff_node_delete(man, n->child);
1057	roff_node_unlink(man, n);
1058	roff_node_free(n);
1059}
1060
1061void
1062deroff(char **dest, const struct roff_node *n)
1063{
1064	char	*cp;
1065	size_t	 sz;
1066
1067	if (n->type != ROFFT_TEXT) {
1068		for (n = n->child; n != NULL; n = n->next)
1069			deroff(dest, n);
1070		return;
1071	}
1072
1073	/* Skip leading whitespace. */
1074
1075	for (cp = n->string; *cp != '\0'; cp++) {
1076		if (cp[0] == '\\' && cp[1] != '\0' &&
1077		    strchr(" %&0^|~", cp[1]) != NULL)
1078			cp++;
1079		else if ( ! isspace((unsigned char)*cp))
1080			break;
1081	}
1082
1083	/* Skip trailing backslash. */
1084
1085	sz = strlen(cp);
1086	if (sz > 0 && cp[sz - 1] == '\\')
1087		sz--;
1088
1089	/* Skip trailing whitespace. */
1090
1091	for (; sz; sz--)
1092		if ( ! isspace((unsigned char)cp[sz-1]))
1093			break;
1094
1095	/* Skip empty strings. */
1096
1097	if (sz == 0)
1098		return;
1099
1100	if (*dest == NULL) {
1101		*dest = mandoc_strndup(cp, sz);
1102		return;
1103	}
1104
1105	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1106	free(*dest);
1107	*dest = cp;
1108}
1109
1110/* --- main functions of the roff parser ---------------------------------- */
1111
1112/*
1113 * In the current line, expand escape sequences that tend to get
1114 * used in numerical expressions and conditional requests.
1115 * Also check the syntax of the remaining escape sequences.
1116 */
1117static enum rofferr
1118roff_res(struct roff *r, struct buf *buf, int ln, int pos)
1119{
1120	char		 ubuf[24]; /* buffer to print the number */
1121	const char	*start;	/* start of the string to process */
1122	char		*stesc;	/* start of an escape sequence ('\\') */
1123	const char	*stnam;	/* start of the name, after "[(*" */
1124	const char	*cp;	/* end of the name, e.g. before ']' */
1125	const char	*res;	/* the string to be substituted */
1126	char		*nbuf;	/* new buffer to copy buf->buf to */
1127	size_t		 maxl;  /* expected length of the escape name */
1128	size_t		 naml;	/* actual length of the escape name */
1129	enum mandoc_esc	 esc;	/* type of the escape sequence */
1130	int		 inaml;	/* length returned from mandoc_escape() */
1131	int		 expand_count;	/* to avoid infinite loops */
1132	int		 npos;	/* position in numeric expression */
1133	int		 arg_complete; /* argument not interrupted by eol */
1134	int		 done;	/* no more input available */
1135	int		 deftype; /* type of definition to paste */
1136	int		 rcsid;	/* kind of RCS id seen */
1137	char		 term;	/* character terminating the escape */
1138
1139	/* Search forward for comments. */
1140
1141	done = 0;
1142	start = buf->buf + pos;
1143	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1144		if (stesc[0] != r->escape || stesc[1] == '\0')
1145			continue;
1146		stesc++;
1147		if (*stesc != '"' && *stesc != '#')
1148			continue;
1149
1150		/* Comment found, look for RCS id. */
1151
1152		rcsid = 0;
1153		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1154			rcsid = 1 << MANDOC_OS_OPENBSD;
1155			cp += 8;
1156		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1157			rcsid = 1 << MANDOC_OS_NETBSD;
1158			cp += 7;
1159		}
1160		if (cp != NULL &&
1161		    isalnum((unsigned char)*cp) == 0 &&
1162		    strchr(cp, '$') != NULL) {
1163			if (r->man->meta.rcsids & rcsid)
1164				mandoc_msg(MANDOCERR_RCS_REP, r->parse,
1165				    ln, stesc + 1 - buf->buf, stesc + 1);
1166			r->man->meta.rcsids |= rcsid;
1167		}
1168
1169		/* Handle trailing whitespace. */
1170
1171		cp = strchr(stesc--, '\0') - 1;
1172		if (*cp == '\n') {
1173			done = 1;
1174			cp--;
1175		}
1176		if (*cp == ' ' || *cp == '\t')
1177			mandoc_msg(MANDOCERR_SPACE_EOL, r->parse,
1178			    ln, cp - buf->buf, NULL);
1179		while (stesc > start && stesc[-1] == ' ')
1180			stesc--;
1181		*stesc = '\0';
1182		break;
1183	}
1184	if (stesc == start)
1185		return ROFF_CONT;
1186	stesc--;
1187
1188	/* Notice the end of the input. */
1189
1190	if (*stesc == '\n') {
1191		*stesc-- = '\0';
1192		done = 1;
1193	}
1194
1195	expand_count = 0;
1196	while (stesc >= start) {
1197
1198		/* Search backwards for the next backslash. */
1199
1200		if (*stesc != r->escape) {
1201			if (*stesc == '\\') {
1202				*stesc = '\0';
1203				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1204				    buf->buf, stesc + 1) + 1;
1205				start = nbuf + pos;
1206				stesc = nbuf + (stesc - buf->buf);
1207				free(buf->buf);
1208				buf->buf = nbuf;
1209			}
1210			stesc--;
1211			continue;
1212		}
1213
1214		/* If it is escaped, skip it. */
1215
1216		for (cp = stesc - 1; cp >= start; cp--)
1217			if (*cp != r->escape)
1218				break;
1219
1220		if ((stesc - cp) % 2 == 0) {
1221			while (stesc > cp)
1222				*stesc-- = '\\';
1223			continue;
1224		} else if (stesc[1] != '\0') {
1225			*stesc = '\\';
1226		} else {
1227			*stesc-- = '\0';
1228			if (done)
1229				continue;
1230			else
1231				return ROFF_APPEND;
1232		}
1233
1234		/* Decide whether to expand or to check only. */
1235
1236		term = '\0';
1237		cp = stesc + 1;
1238		switch (*cp) {
1239		case '*':
1240			res = NULL;
1241			break;
1242		case 'B':
1243		case 'w':
1244			term = cp[1];
1245			/* FALLTHROUGH */
1246		case 'n':
1247			res = ubuf;
1248			break;
1249		default:
1250			esc = mandoc_escape(&cp, &stnam, &inaml);
1251			if (esc == ESCAPE_ERROR ||
1252			    (esc == ESCAPE_SPECIAL &&
1253			     mchars_spec2cp(stnam, inaml) < 0))
1254				mandoc_vmsg(MANDOCERR_ESC_BAD,
1255				    r->parse, ln, (int)(stesc - buf->buf),
1256				    "%.*s", (int)(cp - stesc), stesc);
1257			stesc--;
1258			continue;
1259		}
1260
1261		if (EXPAND_LIMIT < ++expand_count) {
1262			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1263			    ln, (int)(stesc - buf->buf), NULL);
1264			return ROFF_IGN;
1265		}
1266
1267		/*
1268		 * The third character decides the length
1269		 * of the name of the string or register.
1270		 * Save a pointer to the name.
1271		 */
1272
1273		if (term == '\0') {
1274			switch (*++cp) {
1275			case '\0':
1276				maxl = 0;
1277				break;
1278			case '(':
1279				cp++;
1280				maxl = 2;
1281				break;
1282			case '[':
1283				cp++;
1284				term = ']';
1285				maxl = 0;
1286				break;
1287			default:
1288				maxl = 1;
1289				break;
1290			}
1291		} else {
1292			cp += 2;
1293			maxl = 0;
1294		}
1295		stnam = cp;
1296
1297		/* Advance to the end of the name. */
1298
1299		naml = 0;
1300		arg_complete = 1;
1301		while (maxl == 0 || naml < maxl) {
1302			if (*cp == '\0') {
1303				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1304				    ln, (int)(stesc - buf->buf), stesc);
1305				arg_complete = 0;
1306				break;
1307			}
1308			if (maxl == 0 && *cp == term) {
1309				cp++;
1310				break;
1311			}
1312			if (*cp++ != '\\' || stesc[1] != 'w') {
1313				naml++;
1314				continue;
1315			}
1316			switch (mandoc_escape(&cp, NULL, NULL)) {
1317			case ESCAPE_SPECIAL:
1318			case ESCAPE_UNICODE:
1319			case ESCAPE_NUMBERED:
1320			case ESCAPE_OVERSTRIKE:
1321				naml++;
1322				break;
1323			default:
1324				break;
1325			}
1326		}
1327
1328		/*
1329		 * Retrieve the replacement string; if it is
1330		 * undefined, resume searching for escapes.
1331		 */
1332
1333		switch (stesc[1]) {
1334		case '*':
1335			if (arg_complete) {
1336				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1337				res = roff_getstrn(r, stnam, naml, &deftype);
1338			}
1339			break;
1340		case 'B':
1341			npos = 0;
1342			ubuf[0] = arg_complete &&
1343			    roff_evalnum(r, ln, stnam, &npos,
1344			      NULL, ROFFNUM_SCALE) &&
1345			    stnam + npos + 1 == cp ? '1' : '0';
1346			ubuf[1] = '\0';
1347			break;
1348		case 'n':
1349			if (arg_complete)
1350				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1351				    roff_getregn(r, stnam, naml));
1352			else
1353				ubuf[0] = '\0';
1354			break;
1355		case 'w':
1356			/* use even incomplete args */
1357			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1358			    24 * (int)naml);
1359			break;
1360		}
1361
1362		if (res == NULL) {
1363			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1364			    r->parse, ln, (int)(stesc - buf->buf),
1365			    "%.*s", (int)naml, stnam);
1366			res = "";
1367		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1368			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1369			    ln, (int)(stesc - buf->buf), NULL);
1370			return ROFF_IGN;
1371		}
1372
1373		/* Replace the escape sequence by the string. */
1374
1375		*stesc = '\0';
1376		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1377		    buf->buf, res, cp) + 1;
1378
1379		/* Prepare for the next replacement. */
1380
1381		start = nbuf + pos;
1382		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1383		free(buf->buf);
1384		buf->buf = nbuf;
1385	}
1386	return ROFF_CONT;
1387}
1388
1389/*
1390 * Process text streams.
1391 */
1392static enum rofferr
1393roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1394{
1395	size_t		 sz;
1396	const char	*start;
1397	char		*p;
1398	int		 isz;
1399	enum mandoc_esc	 esc;
1400
1401	/* Spring the input line trap. */
1402
1403	if (roffit_lines == 1) {
1404		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1405		free(buf->buf);
1406		buf->buf = p;
1407		buf->sz = isz + 1;
1408		*offs = 0;
1409		free(roffit_macro);
1410		roffit_lines = 0;
1411		return ROFF_REPARSE;
1412	} else if (roffit_lines > 1)
1413		--roffit_lines;
1414
1415	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1416		if (roffce_lines < 1) {
1417			r->man->last = roffce_node;
1418			r->man->next = ROFF_NEXT_SIBLING;
1419			roffce_lines = 0;
1420			roffce_node = NULL;
1421		} else
1422			roffce_lines--;
1423	}
1424
1425	/* Convert all breakable hyphens into ASCII_HYPH. */
1426
1427	start = p = buf->buf + pos;
1428
1429	while (*p != '\0') {
1430		sz = strcspn(p, "-\\");
1431		p += sz;
1432
1433		if (*p == '\0')
1434			break;
1435
1436		if (*p == '\\') {
1437			/* Skip over escapes. */
1438			p++;
1439			esc = mandoc_escape((const char **)&p, NULL, NULL);
1440			if (esc == ESCAPE_ERROR)
1441				break;
1442			while (*p == '-')
1443				p++;
1444			continue;
1445		} else if (p == start) {
1446			p++;
1447			continue;
1448		}
1449
1450		if (isalpha((unsigned char)p[-1]) &&
1451		    isalpha((unsigned char)p[1]))
1452			*p = ASCII_HYPH;
1453		p++;
1454	}
1455	return ROFF_CONT;
1456}
1457
1458enum rofferr
1459roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1460{
1461	enum roff_tok	 t;
1462	enum rofferr	 e;
1463	int		 pos;	/* parse point */
1464	int		 spos;	/* saved parse point for messages */
1465	int		 ppos;	/* original offset in buf->buf */
1466	int		 ctl;	/* macro line (boolean) */
1467
1468	ppos = pos = *offs;
1469
1470	/* Handle in-line equation delimiters. */
1471
1472	if (r->tbl == NULL &&
1473	    r->last_eqn != NULL && r->last_eqn->delim &&
1474	    (r->eqn == NULL || r->eqn_inline)) {
1475		e = roff_eqndelim(r, buf, pos);
1476		if (e == ROFF_REPARSE)
1477			return e;
1478		assert(e == ROFF_CONT);
1479	}
1480
1481	/* Expand some escape sequences. */
1482
1483	e = roff_res(r, buf, ln, pos);
1484	if (e == ROFF_IGN || e == ROFF_APPEND)
1485		return e;
1486	assert(e == ROFF_CONT);
1487
1488	ctl = roff_getcontrol(r, buf->buf, &pos);
1489
1490	/*
1491	 * First, if a scope is open and we're not a macro, pass the
1492	 * text through the macro's filter.
1493	 * Equations process all content themselves.
1494	 * Tables process almost all content themselves, but we want
1495	 * to warn about macros before passing it there.
1496	 */
1497
1498	if (r->last != NULL && ! ctl) {
1499		t = r->last->tok;
1500		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1501		if (e == ROFF_IGN)
1502			return e;
1503		assert(e == ROFF_CONT);
1504	}
1505	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1506		eqn_read(r->eqn, buf->buf + ppos);
1507		return ROFF_IGN;
1508	}
1509	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1510		tbl_read(r->tbl, ln, buf->buf, ppos);
1511		roff_addtbl(r->man, r->tbl);
1512		return ROFF_IGN;
1513	}
1514	if ( ! ctl)
1515		return roff_parsetext(r, buf, pos, offs);
1516
1517	/* Skip empty request lines. */
1518
1519	if (buf->buf[pos] == '"') {
1520		mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1521		    ln, pos, NULL);
1522		return ROFF_IGN;
1523	} else if (buf->buf[pos] == '\0')
1524		return ROFF_IGN;
1525
1526	/*
1527	 * If a scope is open, go to the child handler for that macro,
1528	 * as it may want to preprocess before doing anything with it.
1529	 * Don't do so if an equation is open.
1530	 */
1531
1532	if (r->last) {
1533		t = r->last->tok;
1534		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1535	}
1536
1537	/* No scope is open.  This is a new request or macro. */
1538
1539	spos = pos;
1540	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1541
1542	/* Tables ignore most macros. */
1543
1544	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1545	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1546		mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1547		    ln, pos, buf->buf + spos);
1548		if (t != TOKEN_NONE)
1549			return ROFF_IGN;
1550		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1551			pos++;
1552		while (buf->buf[pos] == ' ')
1553			pos++;
1554		tbl_read(r->tbl, ln, buf->buf, pos);
1555		roff_addtbl(r->man, r->tbl);
1556		return ROFF_IGN;
1557	}
1558
1559	/* For now, let high level macros abort .ce mode. */
1560
1561	if (ctl && roffce_node != NULL &&
1562	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1563	     t == ROFF_TH || t == ROFF_TS)) {
1564		r->man->last = roffce_node;
1565		r->man->next = ROFF_NEXT_SIBLING;
1566		roffce_lines = 0;
1567		roffce_node = NULL;
1568	}
1569
1570	/*
1571	 * This is neither a roff request nor a user-defined macro.
1572	 * Let the standard macro set parsers handle it.
1573	 */
1574
1575	if (t == TOKEN_NONE)
1576		return ROFF_CONT;
1577
1578	/* Execute a roff request or a user defined macro. */
1579
1580	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1581}
1582
1583void
1584roff_endparse(struct roff *r)
1585{
1586	if (r->last != NULL)
1587		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1588		    r->last->line, r->last->col,
1589		    roff_name[r->last->tok]);
1590
1591	if (r->eqn != NULL) {
1592		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1593		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1594		eqn_parse(r->eqn);
1595		r->eqn = NULL;
1596	}
1597
1598	if (r->tbl != NULL) {
1599		mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1600		    r->tbl->line, r->tbl->pos, "TS");
1601		tbl_end(r->tbl);
1602		r->tbl = NULL;
1603	}
1604}
1605
1606/*
1607 * Parse a roff node's type from the input buffer.  This must be in the
1608 * form of ".foo xxx" in the usual way.
1609 */
1610static enum roff_tok
1611roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1612{
1613	char		*cp;
1614	const char	*mac;
1615	size_t		 maclen;
1616	int		 deftype;
1617	enum roff_tok	 t;
1618
1619	cp = buf + *pos;
1620
1621	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1622		return TOKEN_NONE;
1623
1624	mac = cp;
1625	maclen = roff_getname(r, &cp, ln, ppos);
1626
1627	deftype = ROFFDEF_USER | ROFFDEF_REN;
1628	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1629	switch (deftype) {
1630	case ROFFDEF_USER:
1631		t = ROFF_USERDEF;
1632		break;
1633	case ROFFDEF_REN:
1634		t = ROFF_RENAMED;
1635		break;
1636	default:
1637		t = roffhash_find(r->reqtab, mac, maclen);
1638		break;
1639	}
1640	if (t != TOKEN_NONE)
1641		*pos = cp - buf;
1642	return t;
1643}
1644
1645/* --- handling of request blocks ----------------------------------------- */
1646
1647static enum rofferr
1648roff_cblock(ROFF_ARGS)
1649{
1650
1651	/*
1652	 * A block-close `..' should only be invoked as a child of an
1653	 * ignore macro, otherwise raise a warning and just ignore it.
1654	 */
1655
1656	if (r->last == NULL) {
1657		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1658		    ln, ppos, "..");
1659		return ROFF_IGN;
1660	}
1661
1662	switch (r->last->tok) {
1663	case ROFF_am:
1664		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1665	case ROFF_ami:
1666	case ROFF_de:
1667		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1668	case ROFF_dei:
1669	case ROFF_ig:
1670		break;
1671	default:
1672		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1673		    ln, ppos, "..");
1674		return ROFF_IGN;
1675	}
1676
1677	if (buf->buf[pos] != '\0')
1678		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1679		    ".. %s", buf->buf + pos);
1680
1681	roffnode_pop(r);
1682	roffnode_cleanscope(r);
1683	return ROFF_IGN;
1684
1685}
1686
1687static void
1688roffnode_cleanscope(struct roff *r)
1689{
1690
1691	while (r->last) {
1692		if (--r->last->endspan != 0)
1693			break;
1694		roffnode_pop(r);
1695	}
1696}
1697
1698static void
1699roff_ccond(struct roff *r, int ln, int ppos)
1700{
1701
1702	if (NULL == r->last) {
1703		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1704		    ln, ppos, "\\}");
1705		return;
1706	}
1707
1708	switch (r->last->tok) {
1709	case ROFF_el:
1710	case ROFF_ie:
1711	case ROFF_if:
1712		break;
1713	default:
1714		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1715		    ln, ppos, "\\}");
1716		return;
1717	}
1718
1719	if (r->last->endspan > -1) {
1720		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1721		    ln, ppos, "\\}");
1722		return;
1723	}
1724
1725	roffnode_pop(r);
1726	roffnode_cleanscope(r);
1727	return;
1728}
1729
1730static enum rofferr
1731roff_block(ROFF_ARGS)
1732{
1733	const char	*name, *value;
1734	char		*call, *cp, *iname, *rname;
1735	size_t		 csz, namesz, rsz;
1736	int		 deftype;
1737
1738	/* Ignore groff compatibility mode for now. */
1739
1740	if (tok == ROFF_de1)
1741		tok = ROFF_de;
1742	else if (tok == ROFF_dei1)
1743		tok = ROFF_dei;
1744	else if (tok == ROFF_am1)
1745		tok = ROFF_am;
1746	else if (tok == ROFF_ami1)
1747		tok = ROFF_ami;
1748
1749	/* Parse the macro name argument. */
1750
1751	cp = buf->buf + pos;
1752	if (tok == ROFF_ig) {
1753		iname = NULL;
1754		namesz = 0;
1755	} else {
1756		iname = cp;
1757		namesz = roff_getname(r, &cp, ln, ppos);
1758		iname[namesz] = '\0';
1759	}
1760
1761	/* Resolve the macro name argument if it is indirect. */
1762
1763	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1764		deftype = ROFFDEF_USER;
1765		name = roff_getstrn(r, iname, namesz, &deftype);
1766		if (name == NULL) {
1767			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1768			    r->parse, ln, (int)(iname - buf->buf),
1769			    "%.*s", (int)namesz, iname);
1770			namesz = 0;
1771		} else
1772			namesz = strlen(name);
1773	} else
1774		name = iname;
1775
1776	if (namesz == 0 && tok != ROFF_ig) {
1777		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1778		    ln, ppos, roff_name[tok]);
1779		return ROFF_IGN;
1780	}
1781
1782	roffnode_push(r, tok, name, ln, ppos);
1783
1784	/*
1785	 * At the beginning of a `de' macro, clear the existing string
1786	 * with the same name, if there is one.  New content will be
1787	 * appended from roff_block_text() in multiline mode.
1788	 */
1789
1790	if (tok == ROFF_de || tok == ROFF_dei) {
1791		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1792		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1793	} else if (tok == ROFF_am || tok == ROFF_ami) {
1794		deftype = ROFFDEF_ANY;
1795		value = roff_getstrn(r, iname, namesz, &deftype);
1796		switch (deftype) {  /* Before appending, ... */
1797		case ROFFDEF_PRE: /* copy predefined to user-defined. */
1798			roff_setstrn(&r->strtab, name, namesz,
1799			    value, strlen(value), 0);
1800			break;
1801		case ROFFDEF_REN: /* call original standard macro. */
1802			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1803			    (int)strlen(value), value);
1804			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1805			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
1806			free(call);
1807			break;
1808		case ROFFDEF_STD:  /* rename and call standard macro. */
1809			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
1810			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
1811			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
1812			    (int)rsz, rname);
1813			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
1814			free(call);
1815			free(rname);
1816			break;
1817		default:
1818			break;
1819		}
1820	}
1821
1822	if (*cp == '\0')
1823		return ROFF_IGN;
1824
1825	/* Get the custom end marker. */
1826
1827	iname = cp;
1828	namesz = roff_getname(r, &cp, ln, ppos);
1829
1830	/* Resolve the end marker if it is indirect. */
1831
1832	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1833		deftype = ROFFDEF_USER;
1834		name = roff_getstrn(r, iname, namesz, &deftype);
1835		if (name == NULL) {
1836			mandoc_vmsg(MANDOCERR_STR_UNDEF,
1837			    r->parse, ln, (int)(iname - buf->buf),
1838			    "%.*s", (int)namesz, iname);
1839			namesz = 0;
1840		} else
1841			namesz = strlen(name);
1842	} else
1843		name = iname;
1844
1845	if (namesz)
1846		r->last->end = mandoc_strndup(name, namesz);
1847
1848	if (*cp != '\0')
1849		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1850		    ln, pos, ".%s ... %s", roff_name[tok], cp);
1851
1852	return ROFF_IGN;
1853}
1854
1855static enum rofferr
1856roff_block_sub(ROFF_ARGS)
1857{
1858	enum roff_tok	t;
1859	int		i, j;
1860
1861	/*
1862	 * First check whether a custom macro exists at this level.  If
1863	 * it does, then check against it.  This is some of groff's
1864	 * stranger behaviours.  If we encountered a custom end-scope
1865	 * tag and that tag also happens to be a "real" macro, then we
1866	 * need to try interpreting it again as a real macro.  If it's
1867	 * not, then return ignore.  Else continue.
1868	 */
1869
1870	if (r->last->end) {
1871		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1872			if (buf->buf[i] != r->last->end[j])
1873				break;
1874
1875		if (r->last->end[j] == '\0' &&
1876		    (buf->buf[i] == '\0' ||
1877		     buf->buf[i] == ' ' ||
1878		     buf->buf[i] == '\t')) {
1879			roffnode_pop(r);
1880			roffnode_cleanscope(r);
1881
1882			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1883				i++;
1884
1885			pos = i;
1886			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1887			    TOKEN_NONE)
1888				return ROFF_RERUN;
1889			return ROFF_IGN;
1890		}
1891	}
1892
1893	/*
1894	 * If we have no custom end-query or lookup failed, then try
1895	 * pulling it out of the hashtable.
1896	 */
1897
1898	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1899
1900	if (t != ROFF_cblock) {
1901		if (tok != ROFF_ig)
1902			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1903		return ROFF_IGN;
1904	}
1905
1906	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
1907}
1908
1909static enum rofferr
1910roff_block_text(ROFF_ARGS)
1911{
1912
1913	if (tok != ROFF_ig)
1914		roff_setstr(r, r->last->name, buf->buf + pos, 2);
1915
1916	return ROFF_IGN;
1917}
1918
1919static enum rofferr
1920roff_cond_sub(ROFF_ARGS)
1921{
1922	enum roff_tok	 t;
1923	char		*ep;
1924	int		 rr;
1925
1926	rr = r->last->rule;
1927	roffnode_cleanscope(r);
1928
1929	/*
1930	 * If `\}' occurs on a macro line without a preceding macro,
1931	 * drop the line completely.
1932	 */
1933
1934	ep = buf->buf + pos;
1935	if (ep[0] == '\\' && ep[1] == '}')
1936		rr = 0;
1937
1938	/* Always check for the closing delimiter `\}'. */
1939
1940	while ((ep = strchr(ep, '\\')) != NULL) {
1941		switch (ep[1]) {
1942		case '}':
1943			memmove(ep, ep + 2, strlen(ep + 2) + 1);
1944			roff_ccond(r, ln, ep - buf->buf);
1945			break;
1946		case '\0':
1947			++ep;
1948			break;
1949		default:
1950			ep += 2;
1951			break;
1952		}
1953	}
1954
1955	/*
1956	 * Fully handle known macros when they are structurally
1957	 * required or when the conditional evaluated to true.
1958	 */
1959
1960	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1961	return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT)
1962	    ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr
1963	    ? ROFF_CONT : ROFF_IGN;
1964}
1965
1966static enum rofferr
1967roff_cond_text(ROFF_ARGS)
1968{
1969	char		*ep;
1970	int		 rr;
1971
1972	rr = r->last->rule;
1973	roffnode_cleanscope(r);
1974
1975	ep = buf->buf + pos;
1976	while ((ep = strchr(ep, '\\')) != NULL) {
1977		if (*(++ep) == '}') {
1978			*ep = '&';
1979			roff_ccond(r, ln, ep - buf->buf - 1);
1980		}
1981		if (*ep != '\0')
1982			++ep;
1983	}
1984	return rr ? ROFF_CONT : ROFF_IGN;
1985}
1986
1987/* --- handling of numeric and conditional expressions -------------------- */
1988
1989/*
1990 * Parse a single signed integer number.  Stop at the first non-digit.
1991 * If there is at least one digit, return success and advance the
1992 * parse point, else return failure and let the parse point unchanged.
1993 * Ignore overflows, treat them just like the C language.
1994 */
1995static int
1996roff_getnum(const char *v, int *pos, int *res, int flags)
1997{
1998	int	 myres, scaled, n, p;
1999
2000	if (NULL == res)
2001		res = &myres;
2002
2003	p = *pos;
2004	n = v[p] == '-';
2005	if (n || v[p] == '+')
2006		p++;
2007
2008	if (flags & ROFFNUM_WHITE)
2009		while (isspace((unsigned char)v[p]))
2010			p++;
2011
2012	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2013		*res = 10 * *res + v[p] - '0';
2014	if (p == *pos + n)
2015		return 0;
2016
2017	if (n)
2018		*res = -*res;
2019
2020	/* Each number may be followed by one optional scaling unit. */
2021
2022	switch (v[p]) {
2023	case 'f':
2024		scaled = *res * 65536;
2025		break;
2026	case 'i':
2027		scaled = *res * 240;
2028		break;
2029	case 'c':
2030		scaled = *res * 240 / 2.54;
2031		break;
2032	case 'v':
2033	case 'P':
2034		scaled = *res * 40;
2035		break;
2036	case 'm':
2037	case 'n':
2038		scaled = *res * 24;
2039		break;
2040	case 'p':
2041		scaled = *res * 10 / 3;
2042		break;
2043	case 'u':
2044		scaled = *res;
2045		break;
2046	case 'M':
2047		scaled = *res * 6 / 25;
2048		break;
2049	default:
2050		scaled = *res;
2051		p--;
2052		break;
2053	}
2054	if (flags & ROFFNUM_SCALE)
2055		*res = scaled;
2056
2057	*pos = p + 1;
2058	return 1;
2059}
2060
2061/*
2062 * Evaluate a string comparison condition.
2063 * The first character is the delimiter.
2064 * Succeed if the string up to its second occurrence
2065 * matches the string up to its third occurence.
2066 * Advance the cursor after the third occurrence
2067 * or lacking that, to the end of the line.
2068 */
2069static int
2070roff_evalstrcond(const char *v, int *pos)
2071{
2072	const char	*s1, *s2, *s3;
2073	int		 match;
2074
2075	match = 0;
2076	s1 = v + *pos;		/* initial delimiter */
2077	s2 = s1 + 1;		/* for scanning the first string */
2078	s3 = strchr(s2, *s1);	/* for scanning the second string */
2079
2080	if (NULL == s3)		/* found no middle delimiter */
2081		goto out;
2082
2083	while ('\0' != *++s3) {
2084		if (*s2 != *s3) {  /* mismatch */
2085			s3 = strchr(s3, *s1);
2086			break;
2087		}
2088		if (*s3 == *s1) {  /* found the final delimiter */
2089			match = 1;
2090			break;
2091		}
2092		s2++;
2093	}
2094
2095out:
2096	if (NULL == s3)
2097		s3 = strchr(s2, '\0');
2098	else if (*s3 != '\0')
2099		s3++;
2100	*pos = s3 - v;
2101	return match;
2102}
2103
2104/*
2105 * Evaluate an optionally negated single character, numerical,
2106 * or string condition.
2107 */
2108static int
2109roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2110{
2111	char	*cp, *name;
2112	size_t	 sz;
2113	int	 deftype, number, savepos, istrue, wanttrue;
2114
2115	if ('!' == v[*pos]) {
2116		wanttrue = 0;
2117		(*pos)++;
2118	} else
2119		wanttrue = 1;
2120
2121	switch (v[*pos]) {
2122	case '\0':
2123		return 0;
2124	case 'n':
2125	case 'o':
2126		(*pos)++;
2127		return wanttrue;
2128	case 'c':
2129	case 'e':
2130	case 't':
2131	case 'v':
2132		(*pos)++;
2133		return !wanttrue;
2134	case 'd':
2135	case 'r':
2136		cp = v + *pos + 1;
2137		while (*cp == ' ')
2138			cp++;
2139		name = cp;
2140		sz = roff_getname(r, &cp, ln, cp - v);
2141		if (sz == 0)
2142			istrue = 0;
2143		else if (v[*pos] == 'r')
2144			istrue = roff_hasregn(r, name, sz);
2145		else {
2146			deftype = ROFFDEF_ANY;
2147		        roff_getstrn(r, name, sz, &deftype);
2148			istrue = !!deftype;
2149		}
2150		*pos = cp - v;
2151		return istrue == wanttrue;
2152	default:
2153		break;
2154	}
2155
2156	savepos = *pos;
2157	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2158		return (number > 0) == wanttrue;
2159	else if (*pos == savepos)
2160		return roff_evalstrcond(v, pos) == wanttrue;
2161	else
2162		return 0;
2163}
2164
2165static enum rofferr
2166roff_line_ignore(ROFF_ARGS)
2167{
2168
2169	return ROFF_IGN;
2170}
2171
2172static enum rofferr
2173roff_insec(ROFF_ARGS)
2174{
2175
2176	mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
2177	    ln, ppos, roff_name[tok]);
2178	return ROFF_IGN;
2179}
2180
2181static enum rofferr
2182roff_unsupp(ROFF_ARGS)
2183{
2184
2185	mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
2186	    ln, ppos, roff_name[tok]);
2187	return ROFF_IGN;
2188}
2189
2190static enum rofferr
2191roff_cond(ROFF_ARGS)
2192{
2193
2194	roffnode_push(r, tok, NULL, ln, ppos);
2195
2196	/*
2197	 * An `.el' has no conditional body: it will consume the value
2198	 * of the current rstack entry set in prior `ie' calls or
2199	 * defaults to DENY.
2200	 *
2201	 * If we're not an `el', however, then evaluate the conditional.
2202	 */
2203
2204	r->last->rule = tok == ROFF_el ?
2205	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2206	    roff_evalcond(r, ln, buf->buf, &pos);
2207
2208	/*
2209	 * An if-else will put the NEGATION of the current evaluated
2210	 * conditional into the stack of rules.
2211	 */
2212
2213	if (tok == ROFF_ie) {
2214		if (r->rstackpos + 1 == r->rstacksz) {
2215			r->rstacksz += 16;
2216			r->rstack = mandoc_reallocarray(r->rstack,
2217			    r->rstacksz, sizeof(int));
2218		}
2219		r->rstack[++r->rstackpos] = !r->last->rule;
2220	}
2221
2222	/* If the parent has false as its rule, then so do we. */
2223
2224	if (r->last->parent && !r->last->parent->rule)
2225		r->last->rule = 0;
2226
2227	/*
2228	 * Determine scope.
2229	 * If there is nothing on the line after the conditional,
2230	 * not even whitespace, use next-line scope.
2231	 */
2232
2233	if (buf->buf[pos] == '\0') {
2234		r->last->endspan = 2;
2235		goto out;
2236	}
2237
2238	while (buf->buf[pos] == ' ')
2239		pos++;
2240
2241	/* An opening brace requests multiline scope. */
2242
2243	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2244		r->last->endspan = -1;
2245		pos += 2;
2246		while (buf->buf[pos] == ' ')
2247			pos++;
2248		goto out;
2249	}
2250
2251	/*
2252	 * Anything else following the conditional causes
2253	 * single-line scope.  Warn if the scope contains
2254	 * nothing but trailing whitespace.
2255	 */
2256
2257	if (buf->buf[pos] == '\0')
2258		mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
2259		    ln, ppos, roff_name[tok]);
2260
2261	r->last->endspan = 1;
2262
2263out:
2264	*offs = pos;
2265	return ROFF_RERUN;
2266}
2267
2268static enum rofferr
2269roff_ds(ROFF_ARGS)
2270{
2271	char		*string;
2272	const char	*name;
2273	size_t		 namesz;
2274
2275	/* Ignore groff compatibility mode for now. */
2276
2277	if (tok == ROFF_ds1)
2278		tok = ROFF_ds;
2279	else if (tok == ROFF_as1)
2280		tok = ROFF_as;
2281
2282	/*
2283	 * The first word is the name of the string.
2284	 * If it is empty or terminated by an escape sequence,
2285	 * abort the `ds' request without defining anything.
2286	 */
2287
2288	name = string = buf->buf + pos;
2289	if (*name == '\0')
2290		return ROFF_IGN;
2291
2292	namesz = roff_getname(r, &string, ln, pos);
2293	if (name[namesz] == '\\')
2294		return ROFF_IGN;
2295
2296	/* Read past the initial double-quote, if any. */
2297	if (*string == '"')
2298		string++;
2299
2300	/* The rest is the value. */
2301	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2302	    ROFF_as == tok);
2303	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2304	return ROFF_IGN;
2305}
2306
2307/*
2308 * Parse a single operator, one or two characters long.
2309 * If the operator is recognized, return success and advance the
2310 * parse point, else return failure and let the parse point unchanged.
2311 */
2312static int
2313roff_getop(const char *v, int *pos, char *res)
2314{
2315
2316	*res = v[*pos];
2317
2318	switch (*res) {
2319	case '+':
2320	case '-':
2321	case '*':
2322	case '/':
2323	case '%':
2324	case '&':
2325	case ':':
2326		break;
2327	case '<':
2328		switch (v[*pos + 1]) {
2329		case '=':
2330			*res = 'l';
2331			(*pos)++;
2332			break;
2333		case '>':
2334			*res = '!';
2335			(*pos)++;
2336			break;
2337		case '?':
2338			*res = 'i';
2339			(*pos)++;
2340			break;
2341		default:
2342			break;
2343		}
2344		break;
2345	case '>':
2346		switch (v[*pos + 1]) {
2347		case '=':
2348			*res = 'g';
2349			(*pos)++;
2350			break;
2351		case '?':
2352			*res = 'a';
2353			(*pos)++;
2354			break;
2355		default:
2356			break;
2357		}
2358		break;
2359	case '=':
2360		if ('=' == v[*pos + 1])
2361			(*pos)++;
2362		break;
2363	default:
2364		return 0;
2365	}
2366	(*pos)++;
2367
2368	return *res;
2369}
2370
2371/*
2372 * Evaluate either a parenthesized numeric expression
2373 * or a single signed integer number.
2374 */
2375static int
2376roff_evalpar(struct roff *r, int ln,
2377	const char *v, int *pos, int *res, int flags)
2378{
2379
2380	if ('(' != v[*pos])
2381		return roff_getnum(v, pos, res, flags);
2382
2383	(*pos)++;
2384	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2385		return 0;
2386
2387	/*
2388	 * Omission of the closing parenthesis
2389	 * is an error in validation mode,
2390	 * but ignored in evaluation mode.
2391	 */
2392
2393	if (')' == v[*pos])
2394		(*pos)++;
2395	else if (NULL == res)
2396		return 0;
2397
2398	return 1;
2399}
2400
2401/*
2402 * Evaluate a complete numeric expression.
2403 * Proceed left to right, there is no concept of precedence.
2404 */
2405static int
2406roff_evalnum(struct roff *r, int ln, const char *v,
2407	int *pos, int *res, int flags)
2408{
2409	int		 mypos, operand2;
2410	char		 operator;
2411
2412	if (NULL == pos) {
2413		mypos = 0;
2414		pos = &mypos;
2415	}
2416
2417	if (flags & ROFFNUM_WHITE)
2418		while (isspace((unsigned char)v[*pos]))
2419			(*pos)++;
2420
2421	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2422		return 0;
2423
2424	while (1) {
2425		if (flags & ROFFNUM_WHITE)
2426			while (isspace((unsigned char)v[*pos]))
2427				(*pos)++;
2428
2429		if ( ! roff_getop(v, pos, &operator))
2430			break;
2431
2432		if (flags & ROFFNUM_WHITE)
2433			while (isspace((unsigned char)v[*pos]))
2434				(*pos)++;
2435
2436		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2437			return 0;
2438
2439		if (flags & ROFFNUM_WHITE)
2440			while (isspace((unsigned char)v[*pos]))
2441				(*pos)++;
2442
2443		if (NULL == res)
2444			continue;
2445
2446		switch (operator) {
2447		case '+':
2448			*res += operand2;
2449			break;
2450		case '-':
2451			*res -= operand2;
2452			break;
2453		case '*':
2454			*res *= operand2;
2455			break;
2456		case '/':
2457			if (operand2 == 0) {
2458				mandoc_msg(MANDOCERR_DIVZERO,
2459					r->parse, ln, *pos, v);
2460				*res = 0;
2461				break;
2462			}
2463			*res /= operand2;
2464			break;
2465		case '%':
2466			if (operand2 == 0) {
2467				mandoc_msg(MANDOCERR_DIVZERO,
2468					r->parse, ln, *pos, v);
2469				*res = 0;
2470				break;
2471			}
2472			*res %= operand2;
2473			break;
2474		case '<':
2475			*res = *res < operand2;
2476			break;
2477		case '>':
2478			*res = *res > operand2;
2479			break;
2480		case 'l':
2481			*res = *res <= operand2;
2482			break;
2483		case 'g':
2484			*res = *res >= operand2;
2485			break;
2486		case '=':
2487			*res = *res == operand2;
2488			break;
2489		case '!':
2490			*res = *res != operand2;
2491			break;
2492		case '&':
2493			*res = *res && operand2;
2494			break;
2495		case ':':
2496			*res = *res || operand2;
2497			break;
2498		case 'i':
2499			if (operand2 < *res)
2500				*res = operand2;
2501			break;
2502		case 'a':
2503			if (operand2 > *res)
2504				*res = operand2;
2505			break;
2506		default:
2507			abort();
2508		}
2509	}
2510	return 1;
2511}
2512
2513/* --- register management ------------------------------------------------ */
2514
2515void
2516roff_setreg(struct roff *r, const char *name, int val, char sign)
2517{
2518	struct roffreg	*reg;
2519
2520	/* Search for an existing register with the same name. */
2521	reg = r->regtab;
2522
2523	while (reg && strcmp(name, reg->key.p))
2524		reg = reg->next;
2525
2526	if (NULL == reg) {
2527		/* Create a new register. */
2528		reg = mandoc_malloc(sizeof(struct roffreg));
2529		reg->key.p = mandoc_strdup(name);
2530		reg->key.sz = strlen(name);
2531		reg->val = 0;
2532		reg->next = r->regtab;
2533		r->regtab = reg;
2534	}
2535
2536	if ('+' == sign)
2537		reg->val += val;
2538	else if ('-' == sign)
2539		reg->val -= val;
2540	else
2541		reg->val = val;
2542}
2543
2544/*
2545 * Handle some predefined read-only number registers.
2546 * For now, return -1 if the requested register is not predefined;
2547 * in case a predefined read-only register having the value -1
2548 * were to turn up, another special value would have to be chosen.
2549 */
2550static int
2551roff_getregro(const struct roff *r, const char *name)
2552{
2553
2554	switch (*name) {
2555	case '$':  /* Number of arguments of the last macro evaluated. */
2556		return r->argc;
2557	case 'A':  /* ASCII approximation mode is always off. */
2558		return 0;
2559	case 'g':  /* Groff compatibility mode is always on. */
2560		return 1;
2561	case 'H':  /* Fixed horizontal resolution. */
2562		return 24;
2563	case 'j':  /* Always adjust left margin only. */
2564		return 0;
2565	case 'T':  /* Some output device is always defined. */
2566		return 1;
2567	case 'V':  /* Fixed vertical resolution. */
2568		return 40;
2569	default:
2570		return -1;
2571	}
2572}
2573
2574int
2575roff_getreg(const struct roff *r, const char *name)
2576{
2577	struct roffreg	*reg;
2578	int		 val;
2579
2580	if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2581		val = roff_getregro(r, name + 1);
2582		if (-1 != val)
2583			return val;
2584	}
2585
2586	for (reg = r->regtab; reg; reg = reg->next)
2587		if (0 == strcmp(name, reg->key.p))
2588			return reg->val;
2589
2590	return 0;
2591}
2592
2593static int
2594roff_getregn(const struct roff *r, const char *name, size_t len)
2595{
2596	struct roffreg	*reg;
2597	int		 val;
2598
2599	if ('.' == name[0] && 2 == len) {
2600		val = roff_getregro(r, name + 1);
2601		if (-1 != val)
2602			return val;
2603	}
2604
2605	for (reg = r->regtab; reg; reg = reg->next)
2606		if (len == reg->key.sz &&
2607		    0 == strncmp(name, reg->key.p, len))
2608			return reg->val;
2609
2610	return 0;
2611}
2612
2613static int
2614roff_hasregn(const struct roff *r, const char *name, size_t len)
2615{
2616	struct roffreg	*reg;
2617	int		 val;
2618
2619	if ('.' == name[0] && 2 == len) {
2620		val = roff_getregro(r, name + 1);
2621		if (-1 != val)
2622			return 1;
2623	}
2624
2625	for (reg = r->regtab; reg; reg = reg->next)
2626		if (len == reg->key.sz &&
2627		    0 == strncmp(name, reg->key.p, len))
2628			return 1;
2629
2630	return 0;
2631}
2632
2633static void
2634roff_freereg(struct roffreg *reg)
2635{
2636	struct roffreg	*old_reg;
2637
2638	while (NULL != reg) {
2639		free(reg->key.p);
2640		old_reg = reg;
2641		reg = reg->next;
2642		free(old_reg);
2643	}
2644}
2645
2646static enum rofferr
2647roff_nr(ROFF_ARGS)
2648{
2649	char		*key, *val;
2650	size_t		 keysz;
2651	int		 iv;
2652	char		 sign;
2653
2654	key = val = buf->buf + pos;
2655	if (*key == '\0')
2656		return ROFF_IGN;
2657
2658	keysz = roff_getname(r, &val, ln, pos);
2659	if (key[keysz] == '\\')
2660		return ROFF_IGN;
2661	key[keysz] = '\0';
2662
2663	sign = *val;
2664	if (sign == '+' || sign == '-')
2665		val++;
2666
2667	if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2668		roff_setreg(r, key, iv, sign);
2669
2670	return ROFF_IGN;
2671}
2672
2673static enum rofferr
2674roff_rr(ROFF_ARGS)
2675{
2676	struct roffreg	*reg, **prev;
2677	char		*name, *cp;
2678	size_t		 namesz;
2679
2680	name = cp = buf->buf + pos;
2681	if (*name == '\0')
2682		return ROFF_IGN;
2683	namesz = roff_getname(r, &cp, ln, pos);
2684	name[namesz] = '\0';
2685
2686	prev = &r->regtab;
2687	while (1) {
2688		reg = *prev;
2689		if (reg == NULL || !strcmp(name, reg->key.p))
2690			break;
2691		prev = &reg->next;
2692	}
2693	if (reg != NULL) {
2694		*prev = reg->next;
2695		free(reg->key.p);
2696		free(reg);
2697	}
2698	return ROFF_IGN;
2699}
2700
2701/* --- handler functions for roff requests -------------------------------- */
2702
2703static enum rofferr
2704roff_rm(ROFF_ARGS)
2705{
2706	const char	 *name;
2707	char		 *cp;
2708	size_t		  namesz;
2709
2710	cp = buf->buf + pos;
2711	while (*cp != '\0') {
2712		name = cp;
2713		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2714		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2715		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2716		if (name[namesz] == '\\')
2717			break;
2718	}
2719	return ROFF_IGN;
2720}
2721
2722static enum rofferr
2723roff_it(ROFF_ARGS)
2724{
2725	int		 iv;
2726
2727	/* Parse the number of lines. */
2728
2729	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2730		mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2731		    ln, ppos, buf->buf + 1);
2732		return ROFF_IGN;
2733	}
2734
2735	while (isspace((unsigned char)buf->buf[pos]))
2736		pos++;
2737
2738	/*
2739	 * Arm the input line trap.
2740	 * Special-casing "an-trap" is an ugly workaround to cope
2741	 * with DocBook stupidly fiddling with man(7) internals.
2742	 */
2743
2744	roffit_lines = iv;
2745	roffit_macro = mandoc_strdup(iv != 1 ||
2746	    strcmp(buf->buf + pos, "an-trap") ?
2747	    buf->buf + pos : "br");
2748	return ROFF_IGN;
2749}
2750
2751static enum rofferr
2752roff_Dd(ROFF_ARGS)
2753{
2754	int		 mask;
2755	enum roff_tok	 t, te;
2756
2757	switch (tok) {
2758	case ROFF_Dd:
2759		tok = MDOC_Dd;
2760		te = MDOC_MAX;
2761		if (r->format == 0)
2762			r->format = MPARSE_MDOC;
2763		mask = MPARSE_MDOC | MPARSE_QUICK;
2764		break;
2765	case ROFF_TH:
2766		tok = MAN_TH;
2767		te = MAN_MAX;
2768		if (r->format == 0)
2769			r->format = MPARSE_MAN;
2770		mask = MPARSE_QUICK;
2771		break;
2772	default:
2773		abort();
2774	}
2775	if ((r->options & mask) == 0)
2776		for (t = tok; t < te; t++)
2777			roff_setstr(r, roff_name[t], NULL, 0);
2778	return ROFF_CONT;
2779}
2780
2781static enum rofferr
2782roff_TE(ROFF_ARGS)
2783{
2784	if (r->tbl == NULL) {
2785		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2786		    ln, ppos, "TE");
2787		return ROFF_IGN;
2788	}
2789	if (tbl_end(r->tbl) == 0) {
2790		r->tbl = NULL;
2791		free(buf->buf);
2792		buf->buf = mandoc_strdup(".sp");
2793		buf->sz = 4;
2794		return ROFF_REPARSE;
2795	}
2796	r->tbl = NULL;
2797	return ROFF_IGN;
2798}
2799
2800static enum rofferr
2801roff_T_(ROFF_ARGS)
2802{
2803
2804	if (NULL == r->tbl)
2805		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2806		    ln, ppos, "T&");
2807	else
2808		tbl_restart(ln, ppos, r->tbl);
2809
2810	return ROFF_IGN;
2811}
2812
2813/*
2814 * Handle in-line equation delimiters.
2815 */
2816static enum rofferr
2817roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2818{
2819	char		*cp1, *cp2;
2820	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2821
2822	/*
2823	 * Outside equations, look for an opening delimiter.
2824	 * If we are inside an equation, we already know it is
2825	 * in-line, or this function wouldn't have been called;
2826	 * so look for a closing delimiter.
2827	 */
2828
2829	cp1 = buf->buf + pos;
2830	cp2 = strchr(cp1, r->eqn == NULL ?
2831	    r->last_eqn->odelim : r->last_eqn->cdelim);
2832	if (cp2 == NULL)
2833		return ROFF_CONT;
2834
2835	*cp2++ = '\0';
2836	bef_pr = bef_nl = aft_nl = aft_pr = "";
2837
2838	/* Handle preceding text, protecting whitespace. */
2839
2840	if (*buf->buf != '\0') {
2841		if (r->eqn == NULL)
2842			bef_pr = "\\&";
2843		bef_nl = "\n";
2844	}
2845
2846	/*
2847	 * Prepare replacing the delimiter with an equation macro
2848	 * and drop leading white space from the equation.
2849	 */
2850
2851	if (r->eqn == NULL) {
2852		while (*cp2 == ' ')
2853			cp2++;
2854		mac = ".EQ";
2855	} else
2856		mac = ".EN";
2857
2858	/* Handle following text, protecting whitespace. */
2859
2860	if (*cp2 != '\0') {
2861		aft_nl = "\n";
2862		if (r->eqn != NULL)
2863			aft_pr = "\\&";
2864	}
2865
2866	/* Do the actual replacement. */
2867
2868	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2869	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2870	free(buf->buf);
2871	buf->buf = cp1;
2872
2873	/* Toggle the in-line state of the eqn subsystem. */
2874
2875	r->eqn_inline = r->eqn == NULL;
2876	return ROFF_REPARSE;
2877}
2878
2879static enum rofferr
2880roff_EQ(ROFF_ARGS)
2881{
2882	struct roff_node	*n;
2883
2884	if (r->man->macroset == MACROSET_MAN)
2885		man_breakscope(r->man, ROFF_EQ);
2886	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
2887	if (ln > r->man->last->line)
2888		n->flags |= NODE_LINE;
2889	n->eqn = mandoc_calloc(1, sizeof(*n->eqn));
2890	n->eqn->expectargs = UINT_MAX;
2891	roff_node_append(r->man, n);
2892	r->man->next = ROFF_NEXT_SIBLING;
2893
2894	assert(r->eqn == NULL);
2895	if (r->last_eqn == NULL)
2896		r->last_eqn = eqn_alloc(r->parse);
2897	else
2898		eqn_reset(r->last_eqn);
2899	r->eqn = r->last_eqn;
2900	r->eqn->node = n;
2901
2902	if (buf->buf[pos] != '\0')
2903		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2904		    ".EQ %s", buf->buf + pos);
2905
2906	return ROFF_IGN;
2907}
2908
2909static enum rofferr
2910roff_EN(ROFF_ARGS)
2911{
2912	if (r->eqn != NULL) {
2913		eqn_parse(r->eqn);
2914		r->eqn = NULL;
2915	} else
2916		mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2917	if (buf->buf[pos] != '\0')
2918		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2919		    "EN %s", buf->buf + pos);
2920	return ROFF_IGN;
2921}
2922
2923static enum rofferr
2924roff_TS(ROFF_ARGS)
2925{
2926	if (r->tbl != NULL) {
2927		mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2928		    ln, ppos, "TS breaks TS");
2929		tbl_end(r->tbl);
2930	}
2931	r->tbl = tbl_alloc(ppos, ln, r->parse);
2932	if (r->last_tbl)
2933		r->last_tbl->next = r->tbl;
2934	else
2935		r->first_tbl = r->tbl;
2936	r->last_tbl = r->tbl;
2937	return ROFF_IGN;
2938}
2939
2940static enum rofferr
2941roff_onearg(ROFF_ARGS)
2942{
2943	struct roff_node	*n;
2944	char			*cp;
2945	int			 npos;
2946
2947	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
2948	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
2949	     tok == ROFF_ti))
2950		man_breakscope(r->man, tok);
2951
2952	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
2953		r->man->last = roffce_node;
2954		r->man->next = ROFF_NEXT_SIBLING;
2955	}
2956
2957	roff_elem_alloc(r->man, ln, ppos, tok);
2958	n = r->man->last;
2959
2960	cp = buf->buf + pos;
2961	if (*cp != '\0') {
2962		while (*cp != '\0' && *cp != ' ')
2963			cp++;
2964		while (*cp == ' ')
2965			*cp++ = '\0';
2966		if (*cp != '\0')
2967			mandoc_vmsg(MANDOCERR_ARG_EXCESS,
2968			    r->parse, ln, cp - buf->buf,
2969			    "%s ... %s", roff_name[tok], cp);
2970		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
2971	}
2972
2973	if (tok == ROFF_ce || tok == ROFF_rj) {
2974		if (r->man->last->type == ROFFT_ELEM) {
2975			roff_word_alloc(r->man, ln, pos, "1");
2976			r->man->last->flags |= NODE_NOSRC;
2977		}
2978		npos = 0;
2979		if (roff_evalnum(r, ln, r->man->last->string, &npos,
2980		    &roffce_lines, 0) == 0) {
2981			mandoc_vmsg(MANDOCERR_CE_NONUM,
2982			    r->parse, ln, pos, "ce %s", buf->buf + pos);
2983			roffce_lines = 1;
2984		}
2985		if (roffce_lines < 1) {
2986			r->man->last = r->man->last->parent;
2987			roffce_node = NULL;
2988			roffce_lines = 0;
2989		} else
2990			roffce_node = r->man->last->parent;
2991	} else {
2992		n->flags |= NODE_VALID | NODE_ENDED;
2993		r->man->last = n;
2994	}
2995	n->flags |= NODE_LINE;
2996	r->man->next = ROFF_NEXT_SIBLING;
2997	return ROFF_IGN;
2998}
2999
3000static enum rofferr
3001roff_manyarg(ROFF_ARGS)
3002{
3003	struct roff_node	*n;
3004	char			*sp, *ep;
3005
3006	roff_elem_alloc(r->man, ln, ppos, tok);
3007	n = r->man->last;
3008
3009	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3010		while (*ep != '\0' && *ep != ' ')
3011			ep++;
3012		while (*ep == ' ')
3013			*ep++ = '\0';
3014		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3015	}
3016
3017	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3018	r->man->last = n;
3019	r->man->next = ROFF_NEXT_SIBLING;
3020	return ROFF_IGN;
3021}
3022
3023static enum rofferr
3024roff_als(ROFF_ARGS)
3025{
3026	char		*oldn, *newn, *end, *value;
3027	size_t		 oldsz, newsz, valsz;
3028
3029	newn = oldn = buf->buf + pos;
3030	if (*newn == '\0')
3031		return ROFF_IGN;
3032
3033	newsz = roff_getname(r, &oldn, ln, pos);
3034	if (newn[newsz] == '\\' || *oldn == '\0')
3035		return ROFF_IGN;
3036
3037	end = oldn;
3038	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3039	if (oldsz == 0)
3040		return ROFF_IGN;
3041
3042	valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n",
3043	    (int)oldsz, oldn);
3044	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3045	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3046	free(value);
3047	return ROFF_IGN;
3048}
3049
3050static enum rofferr
3051roff_br(ROFF_ARGS)
3052{
3053	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3054		man_breakscope(r->man, ROFF_br);
3055	roff_elem_alloc(r->man, ln, ppos, ROFF_br);
3056	if (buf->buf[pos] != '\0')
3057		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
3058		    "%s %s", roff_name[tok], buf->buf + pos);
3059	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3060	r->man->next = ROFF_NEXT_SIBLING;
3061	return ROFF_IGN;
3062}
3063
3064static enum rofferr
3065roff_cc(ROFF_ARGS)
3066{
3067	const char	*p;
3068
3069	p = buf->buf + pos;
3070
3071	if (*p == '\0' || (r->control = *p++) == '.')
3072		r->control = '\0';
3073
3074	if (*p != '\0')
3075		mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3076		    ln, p - buf->buf, "cc ... %s", p);
3077
3078	return ROFF_IGN;
3079}
3080
3081static enum rofferr
3082roff_ec(ROFF_ARGS)
3083{
3084	const char	*p;
3085
3086	p = buf->buf + pos;
3087	if (*p == '\0')
3088		r->escape = '\\';
3089	else {
3090		r->escape = *p;
3091		if (*++p != '\0')
3092			mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
3093			    ln, p - buf->buf, "ec ... %s", p);
3094	}
3095	return ROFF_IGN;
3096}
3097
3098static enum rofferr
3099roff_eo(ROFF_ARGS)
3100{
3101	r->escape = '\0';
3102	if (buf->buf[pos] != '\0')
3103		mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse,
3104		    ln, pos, "eo %s", buf->buf + pos);
3105	return ROFF_IGN;
3106}
3107
3108static enum rofferr
3109roff_tr(ROFF_ARGS)
3110{
3111	const char	*p, *first, *second;
3112	size_t		 fsz, ssz;
3113	enum mandoc_esc	 esc;
3114
3115	p = buf->buf + pos;
3116
3117	if (*p == '\0') {
3118		mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
3119		return ROFF_IGN;
3120	}
3121
3122	while (*p != '\0') {
3123		fsz = ssz = 1;
3124
3125		first = p++;
3126		if (*first == '\\') {
3127			esc = mandoc_escape(&p, NULL, NULL);
3128			if (esc == ESCAPE_ERROR) {
3129				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3130				    ln, (int)(p - buf->buf), first);
3131				return ROFF_IGN;
3132			}
3133			fsz = (size_t)(p - first);
3134		}
3135
3136		second = p++;
3137		if (*second == '\\') {
3138			esc = mandoc_escape(&p, NULL, NULL);
3139			if (esc == ESCAPE_ERROR) {
3140				mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
3141				    ln, (int)(p - buf->buf), second);
3142				return ROFF_IGN;
3143			}
3144			ssz = (size_t)(p - second);
3145		} else if (*second == '\0') {
3146			mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
3147			    ln, first - buf->buf, "tr %s", first);
3148			second = " ";
3149			p--;
3150		}
3151
3152		if (fsz > 1) {
3153			roff_setstrn(&r->xmbtab, first, fsz,
3154			    second, ssz, 0);
3155			continue;
3156		}
3157
3158		if (r->xtab == NULL)
3159			r->xtab = mandoc_calloc(128,
3160			    sizeof(struct roffstr));
3161
3162		free(r->xtab[(int)*first].p);
3163		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3164		r->xtab[(int)*first].sz = ssz;
3165	}
3166
3167	return ROFF_IGN;
3168}
3169
3170static enum rofferr
3171roff_rn(ROFF_ARGS)
3172{
3173	const char	*value;
3174	char		*oldn, *newn, *end;
3175	size_t		 oldsz, newsz;
3176	int		 deftype;
3177
3178	oldn = newn = buf->buf + pos;
3179	if (*oldn == '\0')
3180		return ROFF_IGN;
3181
3182	oldsz = roff_getname(r, &newn, ln, pos);
3183	if (oldn[oldsz] == '\\' || *newn == '\0')
3184		return ROFF_IGN;
3185
3186	end = newn;
3187	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3188	if (newsz == 0)
3189		return ROFF_IGN;
3190
3191	deftype = ROFFDEF_ANY;
3192	value = roff_getstrn(r, oldn, oldsz, &deftype);
3193	switch (deftype) {
3194	case ROFFDEF_USER:
3195		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3196		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3197		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3198		break;
3199	case ROFFDEF_PRE:
3200		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3201		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3202		break;
3203	case ROFFDEF_REN:
3204		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3205		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3206		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3207		break;
3208	case ROFFDEF_STD:
3209		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3210		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3211		break;
3212	default:
3213		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3214		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3215		break;
3216	}
3217	return ROFF_IGN;
3218}
3219
3220static enum rofferr
3221roff_so(ROFF_ARGS)
3222{
3223	char *name, *cp;
3224
3225	name = buf->buf + pos;
3226	mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
3227
3228	/*
3229	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3230	 * opening anything that's not in our cwd or anything beneath
3231	 * it.  Thus, explicitly disallow traversing up the file-system
3232	 * or using absolute paths.
3233	 */
3234
3235	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3236		mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
3237		    ".so %s", name);
3238		buf->sz = mandoc_asprintf(&cp,
3239		    ".sp\nSee the file %s.\n.sp", name) + 1;
3240		free(buf->buf);
3241		buf->buf = cp;
3242		*offs = 0;
3243		return ROFF_REPARSE;
3244	}
3245
3246	*offs = pos;
3247	return ROFF_SO;
3248}
3249
3250/* --- user defined strings and macros ------------------------------------ */
3251
3252static enum rofferr
3253roff_userdef(ROFF_ARGS)
3254{
3255	const char	 *arg[16], *ap;
3256	char		 *cp, *n1, *n2;
3257	int		  expand_count, i, ib, ie;
3258	size_t		  asz, rsz;
3259
3260	/*
3261	 * Collect pointers to macro argument strings
3262	 * and NUL-terminate them.
3263	 */
3264
3265	r->argc = 0;
3266	cp = buf->buf + pos;
3267	for (i = 0; i < 16; i++) {
3268		if (*cp == '\0')
3269			arg[i] = "";
3270		else {
3271			arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos);
3272			r->argc = i + 1;
3273		}
3274	}
3275
3276	/*
3277	 * Expand macro arguments.
3278	 */
3279
3280	buf->sz = strlen(r->current_string) + 1;
3281	n1 = n2 = cp = mandoc_malloc(buf->sz);
3282	memcpy(n1, r->current_string, buf->sz);
3283	expand_count = 0;
3284	while (*cp != '\0') {
3285
3286		/* Scan ahead for the next argument invocation. */
3287
3288		if (*cp++ != '\\')
3289			continue;
3290		if (*cp++ != '$')
3291			continue;
3292		if (*cp == '*') {  /* \\$* inserts all arguments */
3293			ib = 0;
3294			ie = r->argc - 1;
3295		} else {  /* \\$1 .. \\$9 insert one argument */
3296			ib = ie = *cp - '1';
3297			if (ib < 0 || ib > 8)
3298				continue;
3299		}
3300		cp -= 2;
3301
3302		/*
3303		 * Prevent infinite recursion.
3304		 */
3305
3306		if (cp >= n2)
3307			expand_count = 1;
3308		else if (++expand_count > EXPAND_LIMIT) {
3309			mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
3310			    ln, (int)(cp - n1), NULL);
3311			free(buf->buf);
3312			buf->buf = n1;
3313			return ROFF_IGN;
3314		}
3315
3316		/*
3317		 * Determine the size of the expanded argument,
3318		 * taking escaping of quotes into account.
3319		 */
3320
3321		asz = ie > ib ? ie - ib : 0;  /* for blanks */
3322		for (i = ib; i <= ie; i++) {
3323			for (ap = arg[i]; *ap != '\0'; ap++) {
3324				asz++;
3325				if (*ap == '"')
3326					asz += 3;
3327			}
3328		}
3329		if (asz != 3) {
3330
3331			/*
3332			 * Determine the size of the rest of the
3333			 * unexpanded macro, including the NUL.
3334			 */
3335
3336			rsz = buf->sz - (cp - n1) - 3;
3337
3338			/*
3339			 * When shrinking, move before
3340			 * releasing the storage.
3341			 */
3342
3343			if (asz < 3)
3344				memmove(cp + asz, cp + 3, rsz);
3345
3346			/*
3347			 * Resize the storage for the macro
3348			 * and readjust the parse pointer.
3349			 */
3350
3351			buf->sz += asz - 3;
3352			n2 = mandoc_realloc(n1, buf->sz);
3353			cp = n2 + (cp - n1);
3354			n1 = n2;
3355
3356			/*
3357			 * When growing, make room
3358			 * for the expanded argument.
3359			 */
3360
3361			if (asz > 3)
3362				memmove(cp + asz, cp + 3, rsz);
3363		}
3364
3365		/* Copy the expanded argument, escaping quotes. */
3366
3367		n2 = cp;
3368		for (i = ib; i <= ie; i++) {
3369			for (ap = arg[i]; *ap != '\0'; ap++) {
3370				if (*ap == '"') {
3371					memcpy(n2, "\\(dq", 4);
3372					n2 += 4;
3373				} else
3374					*n2++ = *ap;
3375			}
3376			if (i < ie)
3377				*n2++ = ' ';
3378		}
3379	}
3380
3381	/*
3382	 * Replace the macro invocation
3383	 * by the expanded macro.
3384	 */
3385
3386	free(buf->buf);
3387	buf->buf = n1;
3388	*offs = 0;
3389
3390	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3391	   ROFF_REPARSE : ROFF_APPEND;
3392}
3393
3394/*
3395 * Calling a high-level macro that was renamed with .rn.
3396 * r->current_string has already been set up by roff_parse().
3397 */
3398static enum rofferr
3399roff_renamed(ROFF_ARGS)
3400{
3401	char	*nbuf;
3402
3403	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3404	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3405	free(buf->buf);
3406	buf->buf = nbuf;
3407	return ROFF_CONT;
3408}
3409
3410static size_t
3411roff_getname(struct roff *r, char **cpp, int ln, int pos)
3412{
3413	char	 *name, *cp;
3414	size_t	  namesz;
3415
3416	name = *cpp;
3417	if ('\0' == *name)
3418		return 0;
3419
3420	/* Read until end of name and terminate it with NUL. */
3421	for (cp = name; 1; cp++) {
3422		if ('\0' == *cp || ' ' == *cp) {
3423			namesz = cp - name;
3424			break;
3425		}
3426		if ('\\' != *cp)
3427			continue;
3428		namesz = cp - name;
3429		if ('{' == cp[1] || '}' == cp[1])
3430			break;
3431		cp++;
3432		if ('\\' == *cp)
3433			continue;
3434		mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
3435		    "%.*s", (int)(cp - name + 1), name);
3436		mandoc_escape((const char **)&cp, NULL, NULL);
3437		break;
3438	}
3439
3440	/* Read past spaces. */
3441	while (' ' == *cp)
3442		cp++;
3443
3444	*cpp = cp;
3445	return namesz;
3446}
3447
3448/*
3449 * Store *string into the user-defined string called *name.
3450 * To clear an existing entry, call with (*r, *name, NULL, 0).
3451 * append == 0: replace mode
3452 * append == 1: single-line append mode
3453 * append == 2: multiline append mode, append '\n' after each call
3454 */
3455static void
3456roff_setstr(struct roff *r, const char *name, const char *string,
3457	int append)
3458{
3459	size_t	 namesz;
3460
3461	namesz = strlen(name);
3462	roff_setstrn(&r->strtab, name, namesz, string,
3463	    string ? strlen(string) : 0, append);
3464	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3465}
3466
3467static void
3468roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3469		const char *string, size_t stringsz, int append)
3470{
3471	struct roffkv	*n;
3472	char		*c;
3473	int		 i;
3474	size_t		 oldch, newch;
3475
3476	/* Search for an existing string with the same name. */
3477	n = *r;
3478
3479	while (n && (namesz != n->key.sz ||
3480			strncmp(n->key.p, name, namesz)))
3481		n = n->next;
3482
3483	if (NULL == n) {
3484		/* Create a new string table entry. */
3485		n = mandoc_malloc(sizeof(struct roffkv));
3486		n->key.p = mandoc_strndup(name, namesz);
3487		n->key.sz = namesz;
3488		n->val.p = NULL;
3489		n->val.sz = 0;
3490		n->next = *r;
3491		*r = n;
3492	} else if (0 == append) {
3493		free(n->val.p);
3494		n->val.p = NULL;
3495		n->val.sz = 0;
3496	}
3497
3498	if (NULL == string)
3499		return;
3500
3501	/*
3502	 * One additional byte for the '\n' in multiline mode,
3503	 * and one for the terminating '\0'.
3504	 */
3505	newch = stringsz + (1 < append ? 2u : 1u);
3506
3507	if (NULL == n->val.p) {
3508		n->val.p = mandoc_malloc(newch);
3509		*n->val.p = '\0';
3510		oldch = 0;
3511	} else {
3512		oldch = n->val.sz;
3513		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3514	}
3515
3516	/* Skip existing content in the destination buffer. */
3517	c = n->val.p + (int)oldch;
3518
3519	/* Append new content to the destination buffer. */
3520	i = 0;
3521	while (i < (int)stringsz) {
3522		/*
3523		 * Rudimentary roff copy mode:
3524		 * Handle escaped backslashes.
3525		 */
3526		if ('\\' == string[i] && '\\' == string[i + 1])
3527			i++;
3528		*c++ = string[i++];
3529	}
3530
3531	/* Append terminating bytes. */
3532	if (1 < append)
3533		*c++ = '\n';
3534
3535	*c = '\0';
3536	n->val.sz = (int)(c - n->val.p);
3537}
3538
3539static const char *
3540roff_getstrn(const struct roff *r, const char *name, size_t len,
3541    int *deftype)
3542{
3543	const struct roffkv	*n;
3544	int			 i;
3545	enum roff_tok		 tok;
3546
3547	if (*deftype & ROFFDEF_USER) {
3548		for (n = r->strtab; n != NULL; n = n->next) {
3549			if (strncmp(name, n->key.p, len) == 0 &&
3550			    n->key.p[len] == '\0' &&
3551			    n->val.p != NULL) {
3552				*deftype = ROFFDEF_USER;
3553				return n->val.p;
3554			}
3555		}
3556	}
3557	if (*deftype & ROFFDEF_PRE) {
3558		for (i = 0; i < PREDEFS_MAX; i++) {
3559			if (strncmp(name, predefs[i].name, len) == 0 &&
3560			    predefs[i].name[len] == '\0') {
3561				*deftype = ROFFDEF_PRE;
3562				return predefs[i].str;
3563			}
3564		}
3565	}
3566	if (*deftype & ROFFDEF_REN) {
3567		for (n = r->rentab; n != NULL; n = n->next) {
3568			if (strncmp(name, n->key.p, len) == 0 &&
3569			    n->key.p[len] == '\0' &&
3570			    n->val.p != NULL) {
3571				*deftype = ROFFDEF_REN;
3572				return n->val.p;
3573			}
3574		}
3575	}
3576	if (*deftype & ROFFDEF_STD) {
3577		if (r->man->macroset != MACROSET_MAN) {
3578			for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
3579				if (strncmp(name, roff_name[tok], len) == 0 &&
3580				    roff_name[tok][len] == '\0') {
3581					*deftype = ROFFDEF_STD;
3582					return NULL;
3583				}
3584			}
3585		}
3586		if (r->man->macroset != MACROSET_MDOC) {
3587			for (tok = MAN_TH; tok < MAN_MAX; tok++) {
3588				if (strncmp(name, roff_name[tok], len) == 0 &&
3589				    roff_name[tok][len] == '\0') {
3590					*deftype = ROFFDEF_STD;
3591					return NULL;
3592				}
3593			}
3594		}
3595	}
3596	*deftype = 0;
3597	return NULL;
3598}
3599
3600static void
3601roff_freestr(struct roffkv *r)
3602{
3603	struct roffkv	 *n, *nn;
3604
3605	for (n = r; n; n = nn) {
3606		free(n->key.p);
3607		free(n->val.p);
3608		nn = n->next;
3609		free(n);
3610	}
3611}
3612
3613/* --- accessors and utility functions ------------------------------------ */
3614
3615/*
3616 * Duplicate an input string, making the appropriate character
3617 * conversations (as stipulated by `tr') along the way.
3618 * Returns a heap-allocated string with all the replacements made.
3619 */
3620char *
3621roff_strdup(const struct roff *r, const char *p)
3622{
3623	const struct roffkv *cp;
3624	char		*res;
3625	const char	*pp;
3626	size_t		 ssz, sz;
3627	enum mandoc_esc	 esc;
3628
3629	if (NULL == r->xmbtab && NULL == r->xtab)
3630		return mandoc_strdup(p);
3631	else if ('\0' == *p)
3632		return mandoc_strdup("");
3633
3634	/*
3635	 * Step through each character looking for term matches
3636	 * (remember that a `tr' can be invoked with an escape, which is
3637	 * a glyph but the escape is multi-character).
3638	 * We only do this if the character hash has been initialised
3639	 * and the string is >0 length.
3640	 */
3641
3642	res = NULL;
3643	ssz = 0;
3644
3645	while ('\0' != *p) {
3646		assert((unsigned int)*p < 128);
3647		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
3648			sz = r->xtab[(int)*p].sz;
3649			res = mandoc_realloc(res, ssz + sz + 1);
3650			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
3651			ssz += sz;
3652			p++;
3653			continue;
3654		} else if ('\\' != *p) {
3655			res = mandoc_realloc(res, ssz + 2);
3656			res[ssz++] = *p++;
3657			continue;
3658		}
3659
3660		/* Search for term matches. */
3661		for (cp = r->xmbtab; cp; cp = cp->next)
3662			if (0 == strncmp(p, cp->key.p, cp->key.sz))
3663				break;
3664
3665		if (NULL != cp) {
3666			/*
3667			 * A match has been found.
3668			 * Append the match to the array and move
3669			 * forward by its keysize.
3670			 */
3671			res = mandoc_realloc(res,
3672			    ssz + cp->val.sz + 1);
3673			memcpy(res + ssz, cp->val.p, cp->val.sz);
3674			ssz += cp->val.sz;
3675			p += (int)cp->key.sz;
3676			continue;
3677		}
3678
3679		/*
3680		 * Handle escapes carefully: we need to copy
3681		 * over just the escape itself, or else we might
3682		 * do replacements within the escape itself.
3683		 * Make sure to pass along the bogus string.
3684		 */
3685		pp = p++;
3686		esc = mandoc_escape(&p, NULL, NULL);
3687		if (ESCAPE_ERROR == esc) {
3688			sz = strlen(pp);
3689			res = mandoc_realloc(res, ssz + sz + 1);
3690			memcpy(res + ssz, pp, sz);
3691			break;
3692		}
3693		/*
3694		 * We bail out on bad escapes.
3695		 * No need to warn: we already did so when
3696		 * roff_res() was called.
3697		 */
3698		sz = (int)(p - pp);
3699		res = mandoc_realloc(res, ssz + sz + 1);
3700		memcpy(res + ssz, pp, sz);
3701		ssz += sz;
3702	}
3703
3704	res[(int)ssz] = '\0';
3705	return res;
3706}
3707
3708int
3709roff_getformat(const struct roff *r)
3710{
3711
3712	return r->format;
3713}
3714
3715/*
3716 * Find out whether a line is a macro line or not.
3717 * If it is, adjust the current position and return one; if it isn't,
3718 * return zero and don't change the current position.
3719 * If the control character has been set with `.cc', then let that grain
3720 * precedence.
3721 * This is slighly contrary to groff, where using the non-breaking
3722 * control character when `cc' has been invoked will cause the
3723 * non-breaking macro contents to be printed verbatim.
3724 */
3725int
3726roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3727{
3728	int		pos;
3729
3730	pos = *ppos;
3731
3732	if (r->control != '\0' && cp[pos] == r->control)
3733		pos++;
3734	else if (r->control != '\0')
3735		return 0;
3736	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3737		pos += 2;
3738	else if ('.' == cp[pos] || '\'' == cp[pos])
3739		pos++;
3740	else
3741		return 0;
3742
3743	while (' ' == cp[pos] || '\t' == cp[pos])
3744		pos++;
3745
3746	*ppos = pos;
3747	return 1;
3748}
3749