1/*	$Id: eqn.c,v 1.83 2018/12/14 06:33:14 schwarze Exp $ */
2/*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <time.h>
29
30#include "mandoc_aux.h"
31#include "mandoc.h"
32#include "roff.h"
33#include "eqn.h"
34#include "libmandoc.h"
35#include "eqn_parse.h"
36
37#define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
38#define	STRNEQ(p1, sz1, p2, sz2) \
39	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
40
41enum	eqn_tok {
42	EQN_TOK_DYAD = 0,
43	EQN_TOK_VEC,
44	EQN_TOK_UNDER,
45	EQN_TOK_BAR,
46	EQN_TOK_TILDE,
47	EQN_TOK_HAT,
48	EQN_TOK_DOT,
49	EQN_TOK_DOTDOT,
50	EQN_TOK_FWD,
51	EQN_TOK_BACK,
52	EQN_TOK_DOWN,
53	EQN_TOK_UP,
54	EQN_TOK_FAT,
55	EQN_TOK_ROMAN,
56	EQN_TOK_ITALIC,
57	EQN_TOK_BOLD,
58	EQN_TOK_SIZE,
59	EQN_TOK_SUB,
60	EQN_TOK_SUP,
61	EQN_TOK_SQRT,
62	EQN_TOK_OVER,
63	EQN_TOK_FROM,
64	EQN_TOK_TO,
65	EQN_TOK_BRACE_OPEN,
66	EQN_TOK_BRACE_CLOSE,
67	EQN_TOK_GSIZE,
68	EQN_TOK_GFONT,
69	EQN_TOK_MARK,
70	EQN_TOK_LINEUP,
71	EQN_TOK_LEFT,
72	EQN_TOK_RIGHT,
73	EQN_TOK_PILE,
74	EQN_TOK_LPILE,
75	EQN_TOK_RPILE,
76	EQN_TOK_CPILE,
77	EQN_TOK_MATRIX,
78	EQN_TOK_CCOL,
79	EQN_TOK_LCOL,
80	EQN_TOK_RCOL,
81	EQN_TOK_DELIM,
82	EQN_TOK_DEFINE,
83	EQN_TOK_TDEFINE,
84	EQN_TOK_NDEFINE,
85	EQN_TOK_UNDEF,
86	EQN_TOK_ABOVE,
87	EQN_TOK__MAX,
88	EQN_TOK_FUNC,
89	EQN_TOK_QUOTED,
90	EQN_TOK_SYM,
91	EQN_TOK_EOF
92};
93
94static	const char *eqn_toks[EQN_TOK__MAX] = {
95	"dyad", /* EQN_TOK_DYAD */
96	"vec", /* EQN_TOK_VEC */
97	"under", /* EQN_TOK_UNDER */
98	"bar", /* EQN_TOK_BAR */
99	"tilde", /* EQN_TOK_TILDE */
100	"hat", /* EQN_TOK_HAT */
101	"dot", /* EQN_TOK_DOT */
102	"dotdot", /* EQN_TOK_DOTDOT */
103	"fwd", /* EQN_TOK_FWD * */
104	"back", /* EQN_TOK_BACK */
105	"down", /* EQN_TOK_DOWN */
106	"up", /* EQN_TOK_UP */
107	"fat", /* EQN_TOK_FAT */
108	"roman", /* EQN_TOK_ROMAN */
109	"italic", /* EQN_TOK_ITALIC */
110	"bold", /* EQN_TOK_BOLD */
111	"size", /* EQN_TOK_SIZE */
112	"sub", /* EQN_TOK_SUB */
113	"sup", /* EQN_TOK_SUP */
114	"sqrt", /* EQN_TOK_SQRT */
115	"over", /* EQN_TOK_OVER */
116	"from", /* EQN_TOK_FROM */
117	"to", /* EQN_TOK_TO */
118	"{", /* EQN_TOK_BRACE_OPEN */
119	"}", /* EQN_TOK_BRACE_CLOSE */
120	"gsize", /* EQN_TOK_GSIZE */
121	"gfont", /* EQN_TOK_GFONT */
122	"mark", /* EQN_TOK_MARK */
123	"lineup", /* EQN_TOK_LINEUP */
124	"left", /* EQN_TOK_LEFT */
125	"right", /* EQN_TOK_RIGHT */
126	"pile", /* EQN_TOK_PILE */
127	"lpile", /* EQN_TOK_LPILE */
128	"rpile", /* EQN_TOK_RPILE */
129	"cpile", /* EQN_TOK_CPILE */
130	"matrix", /* EQN_TOK_MATRIX */
131	"ccol", /* EQN_TOK_CCOL */
132	"lcol", /* EQN_TOK_LCOL */
133	"rcol", /* EQN_TOK_RCOL */
134	"delim", /* EQN_TOK_DELIM */
135	"define", /* EQN_TOK_DEFINE */
136	"tdefine", /* EQN_TOK_TDEFINE */
137	"ndefine", /* EQN_TOK_NDEFINE */
138	"undef", /* EQN_TOK_UNDEF */
139	"above", /* EQN_TOK_ABOVE */
140};
141
142static	const char *const eqn_func[] = {
143	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
144	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
145	"if",	"lim",	"ln",	"log",	"max",	"min",
146	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
147};
148
149enum	eqn_symt {
150	EQNSYM_alpha = 0,
151	EQNSYM_beta,
152	EQNSYM_chi,
153	EQNSYM_delta,
154	EQNSYM_epsilon,
155	EQNSYM_eta,
156	EQNSYM_gamma,
157	EQNSYM_iota,
158	EQNSYM_kappa,
159	EQNSYM_lambda,
160	EQNSYM_mu,
161	EQNSYM_nu,
162	EQNSYM_omega,
163	EQNSYM_omicron,
164	EQNSYM_phi,
165	EQNSYM_pi,
166	EQNSYM_ps,
167	EQNSYM_rho,
168	EQNSYM_sigma,
169	EQNSYM_tau,
170	EQNSYM_theta,
171	EQNSYM_upsilon,
172	EQNSYM_xi,
173	EQNSYM_zeta,
174	EQNSYM_DELTA,
175	EQNSYM_GAMMA,
176	EQNSYM_LAMBDA,
177	EQNSYM_OMEGA,
178	EQNSYM_PHI,
179	EQNSYM_PI,
180	EQNSYM_PSI,
181	EQNSYM_SIGMA,
182	EQNSYM_THETA,
183	EQNSYM_UPSILON,
184	EQNSYM_XI,
185	EQNSYM_inter,
186	EQNSYM_union,
187	EQNSYM_prod,
188	EQNSYM_int,
189	EQNSYM_sum,
190	EQNSYM_grad,
191	EQNSYM_del,
192	EQNSYM_times,
193	EQNSYM_cdot,
194	EQNSYM_nothing,
195	EQNSYM_approx,
196	EQNSYM_prime,
197	EQNSYM_half,
198	EQNSYM_partial,
199	EQNSYM_inf,
200	EQNSYM_muchgreat,
201	EQNSYM_muchless,
202	EQNSYM_larrow,
203	EQNSYM_rarrow,
204	EQNSYM_pm,
205	EQNSYM_nequal,
206	EQNSYM_equiv,
207	EQNSYM_lessequal,
208	EQNSYM_moreequal,
209	EQNSYM_minus,
210	EQNSYM__MAX
211};
212
213struct	eqnsym {
214	const char	*str;
215	const char	*sym;
216};
217
218static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
219	{ "alpha", "*a" }, /* EQNSYM_alpha */
220	{ "beta", "*b" }, /* EQNSYM_beta */
221	{ "chi", "*x" }, /* EQNSYM_chi */
222	{ "delta", "*d" }, /* EQNSYM_delta */
223	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
224	{ "eta", "*y" }, /* EQNSYM_eta */
225	{ "gamma", "*g" }, /* EQNSYM_gamma */
226	{ "iota", "*i" }, /* EQNSYM_iota */
227	{ "kappa", "*k" }, /* EQNSYM_kappa */
228	{ "lambda", "*l" }, /* EQNSYM_lambda */
229	{ "mu", "*m" }, /* EQNSYM_mu */
230	{ "nu", "*n" }, /* EQNSYM_nu */
231	{ "omega", "*w" }, /* EQNSYM_omega */
232	{ "omicron", "*o" }, /* EQNSYM_omicron */
233	{ "phi", "*f" }, /* EQNSYM_phi */
234	{ "pi", "*p" }, /* EQNSYM_pi */
235	{ "psi", "*q" }, /* EQNSYM_psi */
236	{ "rho", "*r" }, /* EQNSYM_rho */
237	{ "sigma", "*s" }, /* EQNSYM_sigma */
238	{ "tau", "*t" }, /* EQNSYM_tau */
239	{ "theta", "*h" }, /* EQNSYM_theta */
240	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
241	{ "xi", "*c" }, /* EQNSYM_xi */
242	{ "zeta", "*z" }, /* EQNSYM_zeta */
243	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
244	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
245	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
246	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
247	{ "PHI", "*F" }, /* EQNSYM_PHI */
248	{ "PI", "*P" }, /* EQNSYM_PI */
249	{ "PSI", "*Q" }, /* EQNSYM_PSI */
250	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
251	{ "THETA", "*H" }, /* EQNSYM_THETA */
252	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
253	{ "XI", "*C" }, /* EQNSYM_XI */
254	{ "inter", "ca" }, /* EQNSYM_inter */
255	{ "union", "cu" }, /* EQNSYM_union */
256	{ "prod", "product" }, /* EQNSYM_prod */
257	{ "int", "integral" }, /* EQNSYM_int */
258	{ "sum", "sum" }, /* EQNSYM_sum */
259	{ "grad", "gr" }, /* EQNSYM_grad */
260	{ "del", "gr" }, /* EQNSYM_del */
261	{ "times", "mu" }, /* EQNSYM_times */
262	{ "cdot", "pc" }, /* EQNSYM_cdot */
263	{ "nothing", "&" }, /* EQNSYM_nothing */
264	{ "approx", "~~" }, /* EQNSYM_approx */
265	{ "prime", "fm" }, /* EQNSYM_prime */
266	{ "half", "12" }, /* EQNSYM_half */
267	{ "partial", "pd" }, /* EQNSYM_partial */
268	{ "inf", "if" }, /* EQNSYM_inf */
269	{ ">>", ">>" }, /* EQNSYM_muchgreat */
270	{ "<<", "<<" }, /* EQNSYM_muchless */
271	{ "<-", "<-" }, /* EQNSYM_larrow */
272	{ "->", "->" }, /* EQNSYM_rarrow */
273	{ "+-", "+-" }, /* EQNSYM_pm */
274	{ "!=", "!=" }, /* EQNSYM_nequal */
275	{ "==", "==" }, /* EQNSYM_equiv */
276	{ "<=", "<=" }, /* EQNSYM_lessequal */
277	{ ">=", ">=" }, /* EQNSYM_moreequal */
278	{ "-", "mi" }, /* EQNSYM_minus */
279};
280
281enum	parse_mode {
282	MODE_QUOTED,
283	MODE_NOSUB,
284	MODE_SUB,
285	MODE_TOK
286};
287
288struct	eqn_def {
289	char		 *key;
290	size_t		  keysz;
291	char		 *val;
292	size_t		  valsz;
293};
294
295static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
296static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
297				struct eqn_box *);
298static	void		 eqn_def(struct eqn_node *);
299static	struct eqn_def	*eqn_def_find(struct eqn_node *);
300static	void		 eqn_delim(struct eqn_node *);
301static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
302static	void		 eqn_undef(struct eqn_node *);
303
304
305struct eqn_node *
306eqn_alloc(void)
307{
308	struct eqn_node *ep;
309
310	ep = mandoc_calloc(1, sizeof(*ep));
311	ep->gsize = EQN_DEFSIZE;
312	return ep;
313}
314
315void
316eqn_reset(struct eqn_node *ep)
317{
318	free(ep->data);
319	ep->data = ep->start = ep->end = NULL;
320	ep->sz = ep->toksz = 0;
321}
322
323void
324eqn_read(struct eqn_node *ep, const char *p)
325{
326	char		*cp;
327
328	if (ep->data == NULL) {
329		ep->sz = strlen(p);
330		ep->data = mandoc_strdup(p);
331	} else {
332		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
333		free(ep->data);
334		ep->data = cp;
335	}
336	ep->sz += 1;
337}
338
339/*
340 * Find the key "key" of the give size within our eqn-defined values.
341 */
342static struct eqn_def *
343eqn_def_find(struct eqn_node *ep)
344{
345	int		 i;
346
347	for (i = 0; i < (int)ep->defsz; i++)
348		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
349		    ep->defs[i].keysz, ep->start, ep->toksz))
350			return &ep->defs[i];
351
352	return NULL;
353}
354
355/*
356 * Parse a token from the input text.  The modes are:
357 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
358 *   before its next occurence.  Do not interpret the token in any
359 *   way and return EQN_TOK_QUOTED.  All other modes behave like
360 *   MODE_QUOTED when *ep->start is '"'.
361 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
362 *   otherwise, it ends before the next whitespace or brace.
363 *   Do not interpret the token and return EQN_TOK__MAX.
364 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
365 *   alias created with define.  If it is an alias, replace it with
366 *   its string value and reparse.
367 * MODE_TOK: Like MODE_SUB, but also check the token against the list
368 *   of tokens, and if there is a match, return that token.  Otherwise,
369 *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
370 *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
371 *   a token match, *ep->start is set to an allocated string that the
372 *   caller is expected to free.
373 * All modes skip whitespace following the end of the token.
374 */
375static enum eqn_tok
376eqn_next(struct eqn_node *ep, enum parse_mode mode)
377{
378	static int	 last_len, lim;
379
380	struct eqn_def	*def;
381	size_t		 start;
382	int		 diff, i, quoted;
383	enum eqn_tok	 tok;
384
385	/*
386	 * Reset the recursion counter after advancing
387	 * beyond the end of the previous substitution.
388	 */
389	if (ep->end - ep->data >= last_len)
390		lim = 0;
391
392	ep->start = ep->end;
393	quoted = mode == MODE_QUOTED;
394	for (;;) {
395		switch (*ep->start) {
396		case '\0':
397			ep->toksz = 0;
398			return EQN_TOK_EOF;
399		case '"':
400			quoted = 1;
401			break;
402		default:
403			break;
404		}
405		if (quoted) {
406			ep->end = strchr(ep->start + 1, *ep->start);
407			ep->start++;  /* Skip opening quote. */
408			if (ep->end == NULL) {
409				mandoc_msg(MANDOCERR_ARG_QUOTE,
410				    ep->node->line, ep->node->pos, NULL);
411				ep->end = strchr(ep->start, '\0');
412			}
413		} else {
414			ep->end = ep->start + 1;
415			if (*ep->start != '{' && *ep->start != '}')
416				ep->end += strcspn(ep->end, " ^~\"{}\t");
417		}
418		ep->toksz = ep->end - ep->start;
419		if (quoted && *ep->end != '\0')
420			ep->end++;  /* Skip closing quote. */
421		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
422			ep->end++;
423		if (quoted)  /* Cannot return, may have to strndup. */
424			break;
425		if (mode == MODE_NOSUB)
426			return EQN_TOK__MAX;
427		if ((def = eqn_def_find(ep)) == NULL)
428			break;
429		if (++lim > EQN_NEST_MAX) {
430			mandoc_msg(MANDOCERR_ROFFLOOP,
431			    ep->node->line, ep->node->pos, NULL);
432			return EQN_TOK_EOF;
433		}
434
435		/* Replace a defined name with its string value. */
436		if ((diff = def->valsz - ep->toksz) > 0) {
437			start = ep->start - ep->data;
438			ep->sz += diff;
439			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
440			ep->start = ep->data + start;
441		}
442		if (diff)
443			memmove(ep->start + def->valsz, ep->start + ep->toksz,
444			    strlen(ep->start + ep->toksz) + 1);
445		memcpy(ep->start, def->val, def->valsz);
446		last_len = ep->start - ep->data + def->valsz;
447	}
448	if (mode != MODE_TOK)
449		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
450	if (quoted) {
451		ep->start = mandoc_strndup(ep->start, ep->toksz);
452		return EQN_TOK_QUOTED;
453	}
454	for (tok = 0; tok < EQN_TOK__MAX; tok++)
455		if (STRNEQ(ep->start, ep->toksz,
456		    eqn_toks[tok], strlen(eqn_toks[tok])))
457			return tok;
458
459	for (i = 0; i < EQNSYM__MAX; i++) {
460		if (STRNEQ(ep->start, ep->toksz,
461		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
462			mandoc_asprintf(&ep->start,
463			    "\\[%s]", eqnsyms[i].sym);
464			return EQN_TOK_SYM;
465		}
466	}
467	ep->start = mandoc_strndup(ep->start, ep->toksz);
468	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
469		if (STRNEQ(ep->start, ep->toksz,
470		    eqn_func[i], strlen(eqn_func[i])))
471			return EQN_TOK_FUNC;
472	return EQN_TOK__MAX;
473}
474
475void
476eqn_box_free(struct eqn_box *bp)
477{
478	if (bp == NULL)
479		return;
480
481	if (bp->first)
482		eqn_box_free(bp->first);
483	if (bp->next)
484		eqn_box_free(bp->next);
485
486	free(bp->text);
487	free(bp->left);
488	free(bp->right);
489	free(bp->top);
490	free(bp->bottom);
491	free(bp);
492}
493
494struct eqn_box *
495eqn_box_new(void)
496{
497	struct eqn_box	*bp;
498
499	bp = mandoc_calloc(1, sizeof(*bp));
500	bp->expectargs = UINT_MAX;
501	return bp;
502}
503
504/*
505 * Allocate a box as the last child of the parent node.
506 */
507static struct eqn_box *
508eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
509{
510	struct eqn_box	*bp;
511
512	bp = eqn_box_new();
513	bp->parent = parent;
514	bp->parent->args++;
515	bp->font = bp->parent->font;
516	bp->size = ep->gsize;
517
518	if (NULL != parent->first) {
519		parent->last->next = bp;
520		bp->prev = parent->last;
521	} else
522		parent->first = bp;
523
524	parent->last = bp;
525	return bp;
526}
527
528/*
529 * Reparent the current last node (of the current parent) under a new
530 * EQN_SUBEXPR as the first element.
531 * Then return the new parent.
532 * The new EQN_SUBEXPR will have a two-child limit.
533 */
534static struct eqn_box *
535eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
536{
537	struct eqn_box	*b, *newb;
538
539	assert(NULL != parent->last);
540	b = parent->last;
541	if (parent->last == parent->first)
542		parent->first = NULL;
543	parent->args--;
544	parent->last = b->prev;
545	b->prev = NULL;
546	newb = eqn_box_alloc(ep, parent);
547	newb->type = EQN_SUBEXPR;
548	newb->expectargs = 2;
549	newb->args = 1;
550	newb->first = newb->last = b;
551	newb->first->next = NULL;
552	b->parent = newb;
553	return newb;
554}
555
556/*
557 * Parse the "delim" control statement.
558 */
559static void
560eqn_delim(struct eqn_node *ep)
561{
562	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
563		mandoc_msg(MANDOCERR_REQ_EMPTY,
564		    ep->node->line, ep->node->pos, "delim");
565		if (ep->end[0] != '\0')
566			ep->end++;
567	} else if (strncmp(ep->end, "off", 3) == 0) {
568		ep->delim = 0;
569		ep->end += 3;
570	} else if (strncmp(ep->end, "on", 2) == 0) {
571		if (ep->odelim && ep->cdelim)
572			ep->delim = 1;
573		ep->end += 2;
574	} else {
575		ep->odelim = *ep->end++;
576		ep->cdelim = *ep->end++;
577		ep->delim = 1;
578	}
579}
580
581/*
582 * Undefine a previously-defined string.
583 */
584static void
585eqn_undef(struct eqn_node *ep)
586{
587	struct eqn_def	*def;
588
589	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
590		mandoc_msg(MANDOCERR_REQ_EMPTY,
591		    ep->node->line, ep->node->pos, "undef");
592		return;
593	}
594	if ((def = eqn_def_find(ep)) == NULL)
595		return;
596	free(def->key);
597	free(def->val);
598	def->key = def->val = NULL;
599	def->keysz = def->valsz = 0;
600}
601
602static void
603eqn_def(struct eqn_node *ep)
604{
605	struct eqn_def	*def;
606	int		 i;
607
608	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
609		mandoc_msg(MANDOCERR_REQ_EMPTY,
610		    ep->node->line, ep->node->pos, "define");
611		return;
612	}
613
614	/*
615	 * Search for a key that already exists.
616	 * Create a new key if none is found.
617	 */
618	if ((def = eqn_def_find(ep)) == NULL) {
619		/* Find holes in string array. */
620		for (i = 0; i < (int)ep->defsz; i++)
621			if (0 == ep->defs[i].keysz)
622				break;
623
624		if (i == (int)ep->defsz) {
625			ep->defsz++;
626			ep->defs = mandoc_reallocarray(ep->defs,
627			    ep->defsz, sizeof(struct eqn_def));
628			ep->defs[i].key = ep->defs[i].val = NULL;
629		}
630
631		def = ep->defs + i;
632		free(def->key);
633		def->key = mandoc_strndup(ep->start, ep->toksz);
634		def->keysz = ep->toksz;
635	}
636
637	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
638		mandoc_msg(MANDOCERR_REQ_EMPTY,
639		    ep->node->line, ep->node->pos, "define %s", def->key);
640		free(def->key);
641		free(def->val);
642		def->key = def->val = NULL;
643		def->keysz = def->valsz = 0;
644		return;
645	}
646	free(def->val);
647	def->val = mandoc_strndup(ep->start, ep->toksz);
648	def->valsz = ep->toksz;
649}
650
651void
652eqn_parse(struct eqn_node *ep)
653{
654	struct eqn_box	*cur, *nbox, *parent, *split;
655	const char	*cp, *cpn;
656	char		*p;
657	enum eqn_tok	 tok;
658	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
659	int		 size;
660
661	parent = ep->node->eqn;
662	assert(parent != NULL);
663
664	/*
665	 * Empty equation.
666	 * Do not add it to the high-level syntax tree.
667	 */
668
669	if (ep->data == NULL)
670		return;
671
672	ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
673
674next_tok:
675	tok = eqn_next(ep, MODE_TOK);
676	switch (tok) {
677	case EQN_TOK_UNDEF:
678		eqn_undef(ep);
679		break;
680	case EQN_TOK_NDEFINE:
681	case EQN_TOK_DEFINE:
682		eqn_def(ep);
683		break;
684	case EQN_TOK_TDEFINE:
685		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
686		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
687			mandoc_msg(MANDOCERR_REQ_EMPTY,
688			    ep->node->line, ep->node->pos, "tdefine");
689		break;
690	case EQN_TOK_DELIM:
691		eqn_delim(ep);
692		break;
693	case EQN_TOK_GFONT:
694		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
695			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
696			    ep->node->pos, "%s", eqn_toks[tok]);
697		break;
698	case EQN_TOK_MARK:
699	case EQN_TOK_LINEUP:
700		/* Ignore these. */
701		break;
702	case EQN_TOK_DYAD:
703	case EQN_TOK_VEC:
704	case EQN_TOK_UNDER:
705	case EQN_TOK_BAR:
706	case EQN_TOK_TILDE:
707	case EQN_TOK_HAT:
708	case EQN_TOK_DOT:
709	case EQN_TOK_DOTDOT:
710		if (parent->last == NULL) {
711			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
712			    ep->node->pos, "%s", eqn_toks[tok]);
713			cur = eqn_box_alloc(ep, parent);
714			cur->type = EQN_TEXT;
715			cur->text = mandoc_strdup("");
716		}
717		parent = eqn_box_makebinary(ep, parent);
718		parent->type = EQN_LIST;
719		parent->expectargs = 1;
720		parent->font = EQNFONT_ROMAN;
721		switch (tok) {
722		case EQN_TOK_DOTDOT:
723			parent->top = mandoc_strdup("\\[ad]");
724			break;
725		case EQN_TOK_VEC:
726			parent->top = mandoc_strdup("\\[->]");
727			break;
728		case EQN_TOK_DYAD:
729			parent->top = mandoc_strdup("\\[<>]");
730			break;
731		case EQN_TOK_TILDE:
732			parent->top = mandoc_strdup("\\[a~]");
733			break;
734		case EQN_TOK_UNDER:
735			parent->bottom = mandoc_strdup("\\[ul]");
736			break;
737		case EQN_TOK_BAR:
738			parent->top = mandoc_strdup("\\[rn]");
739			break;
740		case EQN_TOK_DOT:
741			parent->top = mandoc_strdup("\\[a.]");
742			break;
743		case EQN_TOK_HAT:
744			parent->top = mandoc_strdup("\\[ha]");
745			break;
746		default:
747			abort();
748		}
749		parent = parent->parent;
750		break;
751	case EQN_TOK_FWD:
752	case EQN_TOK_BACK:
753	case EQN_TOK_DOWN:
754	case EQN_TOK_UP:
755		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
756			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
757			    ep->node->pos, "%s", eqn_toks[tok]);
758		break;
759	case EQN_TOK_FAT:
760	case EQN_TOK_ROMAN:
761	case EQN_TOK_ITALIC:
762	case EQN_TOK_BOLD:
763		while (parent->args == parent->expectargs)
764			parent = parent->parent;
765		/*
766		 * These values apply to the next word or sequence of
767		 * words; thus, we mark that we'll have a child with
768		 * exactly one of those.
769		 */
770		parent = eqn_box_alloc(ep, parent);
771		parent->type = EQN_LIST;
772		parent->expectargs = 1;
773		switch (tok) {
774		case EQN_TOK_FAT:
775			parent->font = EQNFONT_FAT;
776			break;
777		case EQN_TOK_ROMAN:
778			parent->font = EQNFONT_ROMAN;
779			break;
780		case EQN_TOK_ITALIC:
781			parent->font = EQNFONT_ITALIC;
782			break;
783		case EQN_TOK_BOLD:
784			parent->font = EQNFONT_BOLD;
785			break;
786		default:
787			abort();
788		}
789		break;
790	case EQN_TOK_SIZE:
791	case EQN_TOK_GSIZE:
792		/* Accept two values: integral size and a single. */
793		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
794			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
795			    ep->node->pos, "%s", eqn_toks[tok]);
796			break;
797		}
798		size = mandoc_strntoi(ep->start, ep->toksz, 10);
799		if (-1 == size) {
800			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
801			    ep->node->pos, "%s", eqn_toks[tok]);
802			break;
803		}
804		if (EQN_TOK_GSIZE == tok) {
805			ep->gsize = size;
806			break;
807		}
808		while (parent->args == parent->expectargs)
809			parent = parent->parent;
810		parent = eqn_box_alloc(ep, parent);
811		parent->type = EQN_LIST;
812		parent->expectargs = 1;
813		parent->size = size;
814		break;
815	case EQN_TOK_FROM:
816	case EQN_TOK_TO:
817	case EQN_TOK_SUB:
818	case EQN_TOK_SUP:
819		/*
820		 * We have a left-right-associative expression.
821		 * Repivot under a positional node, open a child scope
822		 * and keep on reading.
823		 */
824		if (parent->last == NULL) {
825			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
826			    ep->node->pos, "%s", eqn_toks[tok]);
827			cur = eqn_box_alloc(ep, parent);
828			cur->type = EQN_TEXT;
829			cur->text = mandoc_strdup("");
830		}
831		while (parent->expectargs == 1 && parent->args == 1)
832			parent = parent->parent;
833		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
834			for (cur = parent; cur != NULL; cur = cur->parent)
835				if (cur->pos == EQNPOS_SUB ||
836				    cur->pos == EQNPOS_SUP ||
837				    cur->pos == EQNPOS_SUBSUP ||
838				    cur->pos == EQNPOS_SQRT ||
839				    cur->pos == EQNPOS_OVER)
840					break;
841			if (cur != NULL)
842				parent = cur->parent;
843		}
844		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
845			parent->expectargs = 3;
846			parent->pos = EQNPOS_SUBSUP;
847			break;
848		}
849		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
850			parent->expectargs = 3;
851			parent->pos = EQNPOS_FROMTO;
852			break;
853		}
854		parent = eqn_box_makebinary(ep, parent);
855		switch (tok) {
856		case EQN_TOK_FROM:
857			parent->pos = EQNPOS_FROM;
858			break;
859		case EQN_TOK_TO:
860			parent->pos = EQNPOS_TO;
861			break;
862		case EQN_TOK_SUP:
863			parent->pos = EQNPOS_SUP;
864			break;
865		case EQN_TOK_SUB:
866			parent->pos = EQNPOS_SUB;
867			break;
868		default:
869			abort();
870		}
871		break;
872	case EQN_TOK_SQRT:
873		while (parent->args == parent->expectargs)
874			parent = parent->parent;
875		/*
876		 * Accept a left-right-associative set of arguments just
877		 * like sub and sup and friends but without rebalancing
878		 * under a pivot.
879		 */
880		parent = eqn_box_alloc(ep, parent);
881		parent->type = EQN_SUBEXPR;
882		parent->pos = EQNPOS_SQRT;
883		parent->expectargs = 1;
884		break;
885	case EQN_TOK_OVER:
886		/*
887		 * We have a right-left-associative fraction.
888		 * Close out anything that's currently open, then
889		 * rebalance and continue reading.
890		 */
891		if (parent->last == NULL) {
892			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
893			    ep->node->pos, "%s", eqn_toks[tok]);
894			cur = eqn_box_alloc(ep, parent);
895			cur->type = EQN_TEXT;
896			cur->text = mandoc_strdup("");
897		}
898		while (parent->args == parent->expectargs)
899			parent = parent->parent;
900		while (EQN_SUBEXPR == parent->type)
901			parent = parent->parent;
902		parent = eqn_box_makebinary(ep, parent);
903		parent->pos = EQNPOS_OVER;
904		break;
905	case EQN_TOK_RIGHT:
906	case EQN_TOK_BRACE_CLOSE:
907		/*
908		 * Close out the existing brace.
909		 * FIXME: this is a shitty sentinel: we should really
910		 * have a native EQN_BRACE type or whatnot.
911		 */
912		for (cur = parent; cur != NULL; cur = cur->parent)
913			if (cur->type == EQN_LIST &&
914			    cur->expectargs > 1 &&
915			    (tok == EQN_TOK_BRACE_CLOSE ||
916			     cur->left != NULL))
917				break;
918		if (cur == NULL) {
919			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
920			    ep->node->pos, "%s", eqn_toks[tok]);
921			break;
922		}
923		parent = cur;
924		if (EQN_TOK_RIGHT == tok) {
925			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
926				mandoc_msg(MANDOCERR_REQ_EMPTY,
927				    ep->node->line, ep->node->pos,
928				    "%s", eqn_toks[tok]);
929				break;
930			}
931			/* Handling depends on right/left. */
932			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
933				parent->right = mandoc_strdup("\\[rc]");
934			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
935				parent->right = mandoc_strdup("\\[rf]");
936			else
937				parent->right =
938				    mandoc_strndup(ep->start, ep->toksz);
939		}
940		parent = parent->parent;
941		if (tok == EQN_TOK_BRACE_CLOSE &&
942		    (parent->type == EQN_PILE ||
943		     parent->type == EQN_MATRIX))
944			parent = parent->parent;
945		/* Close out any "singleton" lists. */
946		while (parent->type == EQN_LIST &&
947		    parent->expectargs == 1 &&
948		    parent->args == 1)
949			parent = parent->parent;
950		break;
951	case EQN_TOK_BRACE_OPEN:
952	case EQN_TOK_LEFT:
953		/*
954		 * If we already have something in the stack and we're
955		 * in an expression, then rewind til we're not any more
956		 * (just like with the text node).
957		 */
958		while (parent->args == parent->expectargs)
959			parent = parent->parent;
960		if (EQN_TOK_LEFT == tok &&
961		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
962			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
963			    ep->node->pos, "%s", eqn_toks[tok]);
964			break;
965		}
966		parent = eqn_box_alloc(ep, parent);
967		parent->type = EQN_LIST;
968		if (EQN_TOK_LEFT == tok) {
969			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
970				parent->left = mandoc_strdup("\\[lc]");
971			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
972				parent->left = mandoc_strdup("\\[lf]");
973			else
974				parent->left =
975				    mandoc_strndup(ep->start, ep->toksz);
976		}
977		break;
978	case EQN_TOK_PILE:
979	case EQN_TOK_LPILE:
980	case EQN_TOK_RPILE:
981	case EQN_TOK_CPILE:
982	case EQN_TOK_CCOL:
983	case EQN_TOK_LCOL:
984	case EQN_TOK_RCOL:
985		while (parent->args == parent->expectargs)
986			parent = parent->parent;
987		parent = eqn_box_alloc(ep, parent);
988		parent->type = EQN_PILE;
989		parent->expectargs = 1;
990		break;
991	case EQN_TOK_ABOVE:
992		for (cur = parent; cur != NULL; cur = cur->parent)
993			if (cur->type == EQN_PILE)
994				break;
995		if (cur == NULL) {
996			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
997			    ep->node->pos, "%s", eqn_toks[tok]);
998			break;
999		}
1000		parent = eqn_box_alloc(ep, cur);
1001		parent->type = EQN_LIST;
1002		break;
1003	case EQN_TOK_MATRIX:
1004		while (parent->args == parent->expectargs)
1005			parent = parent->parent;
1006		parent = eqn_box_alloc(ep, parent);
1007		parent->type = EQN_MATRIX;
1008		parent->expectargs = 1;
1009		break;
1010	case EQN_TOK_EOF:
1011		return;
1012	case EQN_TOK__MAX:
1013	case EQN_TOK_FUNC:
1014	case EQN_TOK_QUOTED:
1015	case EQN_TOK_SYM:
1016		p = ep->start;
1017		assert(p != NULL);
1018		/*
1019		 * If we already have something in the stack and we're
1020		 * in an expression, then rewind til we're not any more.
1021		 */
1022		while (parent->args == parent->expectargs)
1023			parent = parent->parent;
1024		cur = eqn_box_alloc(ep, parent);
1025		cur->type = EQN_TEXT;
1026		cur->text = p;
1027		switch (tok) {
1028		case EQN_TOK_FUNC:
1029			cur->font = EQNFONT_ROMAN;
1030			break;
1031		case EQN_TOK_QUOTED:
1032			if (cur->font == EQNFONT_NONE)
1033				cur->font = EQNFONT_ITALIC;
1034			break;
1035		case EQN_TOK_SYM:
1036			break;
1037		default:
1038			if (cur->font != EQNFONT_NONE || *p == '\0')
1039				break;
1040			cpn = p - 1;
1041			ccln = CCL_LET;
1042			split = NULL;
1043			for (;;) {
1044				/* Advance to next character. */
1045				cp = cpn++;
1046				ccl = ccln;
1047				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1048				    isdigit((unsigned char)*cpn) ||
1049				    (*cpn == '.' && (ccl == CCL_DIG ||
1050				     isdigit((unsigned char)cpn[1]))) ?
1051				    CCL_DIG : CCL_PUN;
1052				/* No boundary before first character. */
1053				if (cp < p)
1054					continue;
1055				cur->font = ccl == CCL_LET ?
1056				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1057				if (*cp == '\\')
1058					mandoc_escape(&cpn, NULL, NULL);
1059				/* No boundary after last character. */
1060				if (*cpn == '\0')
1061					break;
1062				if (ccln == ccl && *cp != ',' && *cpn != ',')
1063					continue;
1064				/* Boundary found, split the text. */
1065				if (parent->args == parent->expectargs) {
1066					/* Remove the text from the tree. */
1067					if (cur->prev == NULL)
1068						parent->first = cur->next;
1069					else
1070						cur->prev->next = NULL;
1071					parent->last = cur->prev;
1072					parent->args--;
1073					/* Set up a list instead. */
1074					split = eqn_box_alloc(ep, parent);
1075					split->type = EQN_LIST;
1076					/* Insert the word into the list. */
1077					split->first = split->last = cur;
1078					cur->parent = split;
1079					cur->prev = NULL;
1080					parent = split;
1081				}
1082				/* Append a new text box. */
1083				nbox = eqn_box_alloc(ep, parent);
1084				nbox->type = EQN_TEXT;
1085				nbox->text = mandoc_strdup(cpn);
1086				/* Truncate the old box. */
1087				p = mandoc_strndup(cur->text,
1088				    cpn - cur->text);
1089				free(cur->text);
1090				cur->text = p;
1091				/* Setup to process the new box. */
1092				cur = nbox;
1093				p = nbox->text;
1094				cpn = p - 1;
1095				ccln = CCL_LET;
1096			}
1097			if (split != NULL)
1098				parent = split->parent;
1099			break;
1100		}
1101		break;
1102	default:
1103		abort();
1104	}
1105	goto next_tok;
1106}
1107
1108void
1109eqn_free(struct eqn_node *p)
1110{
1111	int		 i;
1112
1113	if (p == NULL)
1114		return;
1115
1116	for (i = 0; i < (int)p->defsz; i++) {
1117		free(p->defs[i].key);
1118		free(p->defs[i].val);
1119	}
1120
1121	free(p->data);
1122	free(p->defs);
1123	free(p);
1124}
1125