1/*	$NetBSD: spellprog.c,v 1.10 2021/11/09 09:41:05 nia Exp $	*/
2
3/* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */
4
5/*
6 * Copyright (c) 1991, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)spell.h	8.1 (Berkeley) 6/6/93
34 */
35/*
36 * Copyright (C) Caldera International Inc.  2001-2002.
37 * All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code and documentation must retain the above
43 *    copyright notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 *    notice, this list of conditions and the following disclaimer in the
46 *    documentation and/or other materials provided with the distribution.
47 * 3. All advertising materials mentioning features or use of this software
48 *    must display the following acknowledgement:
49 *	This product includes software developed or owned by Caldera
50 *	International, Inc.
51 * 4. Neither the name of Caldera International, Inc. nor the names of other
52 *    contributors may be used to endorse or promote products derived from
53 *    this software without specific prior written permission.
54 *
55 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
56 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
58 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
59 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
60 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
65 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69#include <sys/cdefs.h>
70
71#ifndef lint
72static const char copyright[] =
73"@(#) Copyright (c) 1991, 1993\n\
74	The Regents of the University of California.  All rights reserved.\n";
75#endif /* not lint */
76
77#ifndef lint
78#if 0
79static const char sccsid[] = "@(#)spell.c	8.1 (Berkeley) 6/6/93";
80#else
81#endif
82static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $";
83#endif /* not lint */
84
85#include <sys/param.h>
86#include <sys/mman.h>
87#include <sys/stat.h>
88
89#include <ctype.h>
90#include <err.h>
91#include <errno.h>
92#include <fcntl.h>
93#include <limits.h>
94#include <locale.h>
95#include <stdio.h>
96#include <stdlib.h>
97#include <string.h>
98#include <unistd.h>
99#include <util.h>
100
101#include "extern.h"
102
103#define DLEV 2
104
105static int	 dict(char *, char *);
106static int	 trypref(char *, const char *, size_t);
107static int	 tryword(char *, char *, size_t);
108static int	 suffix(char *, size_t);
109static int	 vowel(int);
110static const char *lookuppref(char **, char *);
111static char	*skipv(char *);
112static void	 ise(void);
113static void	 print_word(FILE *);
114static void	 ztos(char *);
115static int	 monosyl(char *, char *);
116static void 	 usage(void) __dead;
117static void	 getderiv(size_t);
118
119static int	 an(char *, const char *, const char *, size_t);
120static int	 bility(char *, const char *, const char *, size_t);
121static int	 es(char *, const char *, const char *, size_t);
122static int	 i_to_y(char *, const char *, const char *, size_t);
123static int	 ily(char *, const char *, const char *, size_t);
124static int	 ize(char *, const char *, const char *, size_t);
125static int	 metry(char *, const char *, const char *, size_t);
126static int	 ncy(char *, const char *, const char *, size_t);
127static int	 nop(char *, const char *, const char *, size_t);
128static int	 s(char *, const char *, const char *, size_t);
129static int	 strip(char *, const char *, const char *, size_t);
130static int	 tion(char *, const char *, const char *, size_t);
131static int	 y_to_e(char *, const char *, const char *, size_t);
132static int	 CCe(char *, const char *, const char *, size_t);
133static int	 VCe(char *, const char *, const char *, size_t);
134
135/*
136 * This cannot be const because we modify it when we choose british
137 * spelling.
138 */
139static struct suftab {
140	const char *suf;
141	int (*p1)(char *, const char *, const char *, size_t);
142	int n1;
143	const char *d1;
144	const char *a1;
145	int (*p2)(char *, const char *, const char *, size_t);
146	int n2;
147	const char *d2;
148	const char *a2;
149} suftab[] = {
150	{ .suf = "ssen",	.p1 = ily,	.n1 = 4,
151	  .d1 = "-y+iness", 	.a1 = "+ness" },
152	{ .suf = "ssel",	.p1 = ily,	.n1 = 4,
153	  .d1 = "-y+i+less", 	.a1 = "+less" },
154	{ .suf = "se",		.p1 = s,	.n1 = 1,
155	  .d1 = "", 		.a1 = "+s",	.p2 = es,
156	  .n2 = 2,		.d2 = "-y+ies",	.a2 = "+es" },
157	{ .suf = "s'",		.p1 = s,	.n1 = 2,
158	  .d1 = "", 		.a1 = "+'s" },
159	{ .suf = "s",		.p1 = s,	.n1 = 1,
160	  .d1 = "", 		.a1 = "+s" },
161	{ .suf = "ecn",		.p1 = ncy,	.n1 = 1,
162	  .d1 = "", 		.a1 = "-t+ce" },
163	{ .suf = "ycn",		.p1 = ncy,	.n1 = 1,
164	  .d1 = "", 		.a1 = "-cy+t" },
165	{ .suf = "ytilb",	.p1 = nop,	.n1 = 0,
166	  .d1 = "", 		.a1 = "" },
167	{ .suf = "ytilib",	.p1 = bility,	.n1 = 5,
168	  .d1 = "-le+ility", 	.a1 = "" },
169	{ .suf = "elbaif",	.p1 = i_to_y,	.n1 = 4,
170	  .d1 = "-y+iable", 	.a1 = "" },
171	{ .suf = "elba",	.p1 = CCe,	.n1 = 4,
172	  .d1 = "-e+able", 	.a1 = "+able" },
173	{ .suf = "yti",		.p1 = CCe,	.n1 = 3,
174	  .d1 = "-e+ity", 	.a1 = "+ity" },
175	{ .suf = "ylb",		.p1 = y_to_e,	.n1 = 1,
176	  .d1 = "-e+y", 	.a1 = "" },
177	{ .suf = "yl",		.p1 = ily,	.n1 = 2,
178	  .d1 = "-y+ily", 	.a1 = "+ly" },
179	{ .suf = "laci",	.p1 = strip,	.n1 = 2,
180	  .d1 = "", 		.a1 = "+al" },
181	{ .suf = "latnem",	.p1 = strip,	.n1 = 2,
182	  .d1 = "", 		.a1 = "+al" },
183	{ .suf = "lanoi",	.p1 = strip,	.n1 = 2,
184	  .d1 = "", 		.a1 = "+al" },
185	{ .suf = "tnem",	.p1 = strip,	.n1 = 4,
186	  .d1 = "", 		.a1 = "+ment" },
187	{ .suf = "gni",		.p1 = CCe,	.n1 = 3,
188	  .d1 = "-e+ing", 	.a1 = "+ing" },
189	{ .suf = "reta",	.p1 = nop,	.n1 = 0,
190	  .d1 = "", 		.a1 = "" },
191	{ .suf = "re",		.p1 = strip,	.n1 = 1,
192	  .d1 = "", 		.a1 = "+r",	.p2 = i_to_y,
193	  .n2 = 2,		.d2 = "-y+ier",	.a2 = "+er" },
194	{ .suf = "de",		.p1 = strip,	.n1 = 1,
195	  .d1 = "", 		.a1 = "+d",	.p2 = i_to_y,
196	  .n2 = 2,		.d2 = "-y+ied",	.a2 = "+ed" },
197	{ .suf = "citsi",	.p1 = strip,	.n1 = 2,
198	  .d1 = "", 		.a1 = "+ic" },
199	{ .suf = "cihparg",	.p1 = i_to_y,	.n1 = 1,
200	  .d1 = "-y+ic", 	.a1 = "" },
201	{ .suf = "tse",		.p1 = strip,	.n1 = 2,
202	  .d1 = "", 		.a1 = "+st",	.p2 = i_to_y,
203	  .n2 = 3,		.d2 = "-y+iest",.a2 = "+est" },
204	{ .suf = "cirtem",	.p1 = i_to_y,	.n1 = 1,
205	  .d1 = "-y+ic", 	.a1 = "" },
206	{ .suf = "yrtem",	.p1 = metry,	.n1 = 0,
207	  .d1 = "-ry+er", 	.a1 = "" },
208	{ .suf = "cigol",	.p1 = i_to_y,	.n1 = 1,
209	  .d1 = "-y+ic", 	.a1 = "" },
210	{ .suf = "tsigol",	.p1 = i_to_y,	.n1 = 2,
211	  .d1 = "-y+ist", 	.a1 = "" },
212	{ .suf = "tsi",		.p1 = VCe,	.n1 = 3,
213	  .d1 = "-e+ist", 	.a1 = "+ist" },
214	{ .suf = "msi",		.p1 = VCe,	.n1 = 3,
215	  .d1 = "-e+ism", 	.a1 = "+ist" },
216	{ .suf = "noitacif",	.p1 = i_to_y,	.n1 = 6,
217	  .d1 = "-y+ication", 	.a1 = "" },
218	{ .suf = "noitazi",	.p1 = ize,	.n1 = 5,
219	  .d1 = "-e+ation", 	.a1 = "" },
220	{ .suf = "rota",	.p1 = tion,	.n1 = 2,
221	  .d1 = "-e+or", 	.a1 = "" },
222	{ .suf = "noit",	.p1 = tion,	.n1 = 3,
223	  .d1 = "-e+ion", 	.a1 = "+ion" },
224	{ .suf = "naino",	.p1 = an,	.n1 = 3,
225	  .d1 = "", 		.a1 = "+ian" },
226	{ .suf = "na",		.p1 = an,	.n1 = 1,
227	  .d1 = "", 		.a1 = "+n" },
228	{ .suf = "evit",	.p1 = tion,	.n1 = 3,
229	  .d1 = "-e+ive", 	.a1 = "+ive" },
230	{ .suf = "ezi",		.p1 = CCe,	.n1 = 3,
231	  .d1 = "-e+ize", 	.a1 = "+ize" },
232	{ .suf = "pihs",	.p1 = strip,	.n1 = 4,
233	  .d1 = "", 		.a1 = "+ship" },
234	{ .suf = "dooh",	.p1 = ily,	.n1 = 4,
235	  .d1 = "-y+hood", 	.a1 = "+hood" },
236	{ .suf = "ekil",	.p1 = strip,	.n1 = 4,
237	  .d1 = "", 		.a1 = "+like" },
238	{ .suf = NULL, }
239};
240
241static const char *preftab[] = {
242	"anti",
243	"bio",
244	"dis",
245	"electro",
246	"en",
247	"fore",
248	"hyper",
249	"intra",
250	"inter",
251	"iso",
252	"kilo",
253	"magneto",
254	"meta",
255	"micro",
256	"milli",
257	"mis",
258	"mono",
259	"multi",
260	"non",
261	"out",
262	"over",
263	"photo",
264	"poly",
265	"pre",
266	"pseudo",
267	"re",
268	"semi",
269	"stereo",
270	"sub",
271	"super",
272	"thermo",
273	"ultra",
274	"under",	/* must precede un */
275	"un",
276	NULL
277};
278
279static struct wlist {
280	int fd;
281	unsigned char *front;
282	unsigned char *back;
283} *wlists;
284
285static int vflag;
286static int xflag;
287static char word[LINE_MAX];
288static char original[LINE_MAX];
289static char affix[LINE_MAX];
290static struct {
291	const char **buf;
292	size_t maxlev;
293} deriv;
294
295/*
296 * The spellprog utility accepts a newline-delimited list of words
297 * on stdin.  For arguments it expects the path to a word list and
298 * the path to a file in which to store found words.
299 *
300 * In normal usage, spell is called twice.  The first time it is
301 * called with a stop list to flag commonly mispelled words.  The
302 * remaining words are then passed to spell again, this time with
303 * the dictionary file as the first (non-flag) argument.
304 *
305 * Unlike historic versions of spellprog, this one does not use
306 * hashed files.  Instead it simply requires that files be sorted
307 * lexigraphically and uses the same algorithm as the look utility.
308 *
309 * Note that spellprog should be called via the spell shell script
310 * and is not meant to be invoked directly by the user.
311 */
312
313int
314main(int argc, char **argv)
315{
316	char *ep, *cp, *dp;
317	char *outfile;
318	int ch, fold, i;
319	struct stat sb;
320	FILE *file, *found;
321
322	setlocale(LC_ALL, "");
323
324	outfile = NULL;
325	while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
326		switch (ch) {
327		case 'b':
328			/* Use British dictionary and convert ize -> ise. */
329			ise();
330			break;
331		case 'o':
332			outfile = optarg;
333			break;
334		case 'v':
335			/* Also write derivations to "found" file. */
336			vflag++;
337			break;
338		case 'x':
339			/* Print plausible stems to stdout. */
340			xflag++;
341			break;
342		default:
343			usage();
344		}
345
346	}
347	argc -= optind;
348	argv += optind;
349	if (argc < 1)
350		usage();
351
352	/* Open and mmap the word/stop lists. */
353	if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL)
354		err(1, "malloc");
355
356	for (i = 0; argc--; i++) {
357		wlists[i].fd = open(argv[i], O_RDONLY, 0);
358		if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
359			err(1, "%s", argv[i]);
360		if (sb.st_size > SIZE_T_MAX)
361			errx(1, "%s: %s", argv[i], strerror(EFBIG));
362		wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
363		    MAP_PRIVATE, wlists[i].fd, (off_t)0);
364		if (wlists[i].front == MAP_FAILED)
365			err(1, "%s", argv[i]);
366		wlists[i].back = wlists[i].front + (size_t)sb.st_size;
367	}
368	wlists[i].fd = -1;
369
370	/* Open file where found words are to be saved. */
371	if (outfile == NULL)
372		found = NULL;
373	else if ((found = fopen(outfile, "w")) == NULL)
374		err(1, "cannot open %s", outfile);
375
376	for (;; print_word(file)) {
377		affix[0] = '\0';
378		file = found;
379		for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
380			if (ep - word == sizeof(word) - 1) {
381				*ep = '\0';
382				warnx("word too long (%s)", word);
383				while ((ch = getchar()) != '\n')
384					;	/* slurp until EOL */
385			}
386			if (ch == EOF) {
387				if (found != NULL)
388					fclose(found);
389				exit(0);
390			}
391		}
392		for (cp = word, dp = original; cp < ep; )
393			*dp++ = *cp++;
394		*dp = '\0';
395		fold = 0;
396		for (cp = word; cp < ep; cp++)
397			if (islower((unsigned char)*cp))
398				goto lcase;
399		if (trypref(ep, ".", 0))
400			continue;
401		++fold;
402		for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
403			*dp = tolower((unsigned char)*cp);
404lcase:
405		if (trypref(ep, ".", 0) || suffix(ep, 0))
406			continue;
407		if (isupper((unsigned char)word[0])) {
408			for (cp = original, dp = word; (*dp = *cp++); dp++) {
409				if (fold)
410					*dp = tolower((unsigned char)*dp);
411			}
412			word[0] = tolower((unsigned char)word[0]);
413			goto lcase;
414		}
415		file = stdout;
416	}
417}
418
419static void
420print_word(FILE *f)
421{
422
423	if (f != NULL) {
424		if (vflag && affix[0] != '\0' && affix[0] != '.')
425			fprintf(f, "%s\t%s\n", affix, original);
426		else
427			fprintf(f, "%s\n", original);
428	}
429}
430
431/*
432 * For each matching suffix in suftab, call the function associated
433 * with that suffix (p1 and p2).
434 */
435static int
436suffix(char *ep, size_t lev)
437{
438	const struct suftab *t;
439	char *cp;
440	const char *sp;
441
442	lev += DLEV;
443	getderiv(lev + 1);
444	deriv.buf[lev] = deriv.buf[lev - 1] = 0;
445	for (t = suftab; (sp = t->suf) != NULL; t++) {
446		cp = ep;
447		while (*sp) {
448			if (*--cp != *sp++)
449				goto next;
450		}
451		for (sp = cp; --sp >= word && !vowel(*sp);)
452			;	/* nothing */
453		if (sp < word)
454			return 0;
455		if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1))
456			return 1;
457		if (t->p2 != NULL) {
458			deriv.buf[lev] = deriv.buf[lev + 1] = NULL;
459			return (*t->p2)(ep - t->n2, t->d2, t->a2, lev);
460		}
461		return 0;
462next:		;
463	}
464	return 0;
465}
466
467static int
468/*ARGSUSED*/
469nop(char *ep, const char *d, const char *a, size_t lev)
470{
471
472	return 0;
473}
474
475static int
476/*ARGSUSED*/
477strip(char *ep, const char *d, const char *a, size_t lev)
478{
479
480	return trypref(ep, a, lev) || suffix(ep, lev);
481}
482
483static int
484s(char *ep, const char *d, const char *a, const size_t lev)
485{
486
487	if (lev > DLEV + 1)
488		return 0;
489	if (*ep == 's' && ep[-1] == 's')
490		return 0;
491	return strip(ep, d, a, lev);
492}
493
494static int
495/*ARGSUSED*/
496an(char *ep, const char *d, const char *a, size_t lev)
497{
498
499	if (!isupper((unsigned char)*word))	/* must be proper name */
500		return 0;
501	return trypref(ep, a, lev);
502}
503
504static int
505/*ARGSUSED*/
506ize(char *ep, const char *d, const char *a, size_t lev)
507{
508
509	*ep++ = 'e';
510	return strip(ep ,"", d, lev);
511}
512
513static int
514/*ARGSUSED*/
515y_to_e(char *ep, const char *d, const char *a, size_t lev)
516{
517	char c = *ep;
518
519	*ep++ = 'e';
520	if (strip(ep, "", d, lev))
521		return 1;
522	ep[-1] = c;
523	return 0;
524}
525
526static int
527ily(char *ep, const char *d, const char *a, size_t lev)
528{
529
530	if (ep[-1] == 'i')
531		return i_to_y(ep, d, a, lev);
532	else
533		return strip(ep, d, a, lev);
534}
535
536static int
537ncy(char *ep, const char *d, const char *a, size_t lev)
538{
539
540	if (skipv(skipv(ep - 1)) < word)
541		return 0;
542	ep[-1] = 't';
543	return strip(ep, d, a, lev);
544}
545
546static int
547bility(char *ep, const char *d, const char *a, size_t lev)
548{
549
550	*ep++ = 'l';
551	return y_to_e(ep, d, a, lev);
552}
553
554static int
555i_to_y(char *ep, const char *d, const char *a, size_t lev)
556{
557
558	if (ep[-1] == 'i') {
559		ep[-1] = 'y';
560		a = d;
561	}
562	return strip(ep, "", a, lev);
563}
564
565static int
566es(char *ep, const char *d, const char *a, size_t lev)
567{
568
569	if (lev > DLEV)
570		return 0;
571
572	switch (ep[-1]) {
573	default:
574		return 0;
575	case 'i':
576		return i_to_y(ep, d, a, lev);
577	case 's':
578	case 'h':
579	case 'z':
580	case 'x':
581		return strip(ep, d, a, lev);
582	}
583}
584
585static int
586metry(char *ep, const char *d, const char *a, size_t lev)
587{
588
589	ep[-2] = 'e';
590	ep[-1] = 'r';
591	return strip(ep, d, a, lev);
592}
593
594static int
595tion(char *ep, const char *d, const char *a, size_t lev)
596{
597
598	switch (ep[-2]) {
599	case 'c':
600	case 'r':
601		return trypref(ep, a, lev);
602	case 'a':
603		return y_to_e(ep, d, a, lev);
604	}
605	return 0;
606}
607
608/*
609 * Possible consonant-consonant-e ending.
610 */
611static int
612CCe(char *ep, const char *d, const char *a, size_t lev)
613{
614
615	switch (ep[-1]) {
616	case 'l':
617		if (vowel(ep[-2]))
618			break;
619		switch (ep[-2]) {
620		case 'l':
621		case 'r':
622		case 'w':
623			break;
624		default:
625			return y_to_e(ep, d, a, lev);
626		}
627		break;
628	case 's':
629		if (ep[-2] == 's')
630			break;
631		/*FALLTHROUGH*/
632	case 'c':
633	case 'g':
634		if (*ep == 'a')
635			return 0;
636		/*FALLTHROUGH*/
637	case 'v':
638	case 'z':
639		if (vowel(ep[-2]))
640			break;
641		/*FALLTHROUGH*/
642	case 'u':
643		if (y_to_e(ep, d, a, lev))
644			return 1;
645		if (!(ep[-2] == 'n' && ep[-1] == 'g'))
646			return 0;
647	}
648	return VCe(ep, d, a, lev);
649}
650
651/*
652 * Possible consonant-vowel-consonant-e ending.
653 */
654static int
655VCe(char *ep, const char *d, const char *a, size_t lev)
656{
657	char c;
658
659	c = ep[-1];
660	if (c == 'e')
661		return 0;
662	if (!vowel(c) && vowel(ep[-2])) {
663		c = *ep;
664		*ep++ = 'e';
665		if (trypref(ep, d, lev) || suffix(ep, lev))
666			return 1;
667		ep--;
668		*ep = c;
669	}
670	return strip(ep, d, a, lev);
671}
672
673static const char *
674lookuppref(char **wp, char *ep)
675{
676	const char **sp, *cp;
677	char *bp;
678
679	for (sp = preftab; *sp; sp++) {
680		bp = *wp;
681		for (cp = *sp; *cp; cp++, bp++) {
682			if (tolower((unsigned char)*bp) != *cp)
683				goto next;
684		}
685		for (cp = bp; cp < ep; cp++) {
686			if (vowel(*cp)) {
687				*wp = bp;
688				return *sp;
689			}
690		}
691next:		;
692	}
693	return 0;
694}
695
696/*
697 * If the word is not in the dictionary, try stripping off prefixes
698 * until the word is found or we run out of prefixes to check.
699 */
700static int
701trypref(char *ep, const char *a, size_t lev)
702{
703	const char *cp;
704	char *bp;
705	char *pp;
706	int val = 0;
707	char space[20];
708
709	getderiv(lev + 2);
710	deriv.buf[lev] = a;
711	if (tryword(word, ep, lev))
712		return 1;
713	bp = word;
714	pp = space;
715	deriv.buf[lev + 1] = pp;
716	while ((cp = lookuppref(&bp, ep)) != NULL) {
717		*pp++ = '+';
718		while ((*pp = *cp++))
719			pp++;
720		if (tryword(bp, ep, lev + 1)) {
721			val = 1;
722			break;
723		}
724		if (pp - space >= sizeof(space))
725			return 0;
726	}
727	deriv.buf[lev + 1] = deriv.buf[lev + 2] = NULL;
728	return val;
729}
730
731static int
732tryword(char *bp, char *ep, size_t lev)
733{
734	size_t i, j;
735	char duple[3];
736
737	if (ep-bp <= 1)
738		return 0;
739	if (vowel(*ep) && monosyl(bp, ep))
740		return 0;
741
742	i = dict(bp, ep);
743	if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] &&
744	    monosyl(bp, ep - 1)) {
745		ep--;
746		getderiv(++lev);
747		deriv.buf[lev] = duple;
748		duple[0] = '+';
749		duple[1] = *ep;
750		duple[2] = '\0';
751		i = dict(bp, ep);
752	}
753	if (vflag == 0 || i == 0)
754		return i;
755
756	/* Also tack on possible derivations. (XXX - warn on truncation?) */
757	for (j = lev; j > 0; j--) {
758		if (deriv.buf[j])
759			(void)strlcat(affix, deriv.buf[j], sizeof(affix));
760	}
761	return i;
762}
763
764static int
765monosyl(char *bp, char *ep)
766{
767
768	if (ep < bp + 2)
769		return 0;
770	if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
771		return 0;
772	while (--ep >= bp)
773		if (vowel(*ep))
774			return 0;
775	return 1;
776}
777
778static char *
779skipv(char *st)
780{
781
782	if (st >= word && vowel(*st))
783		st--;
784	while (st >= word && !vowel(*st))
785		st--;
786	return st;
787}
788
789static int
790vowel(int c)
791{
792
793	switch (tolower(c)) {
794	case 'a':
795	case 'e':
796	case 'i':
797	case 'o':
798	case 'u':
799	case 'y':
800		return 1;
801	}
802	return 0;
803}
804
805/*
806 * Crummy way to Britishise.
807 */
808static void
809ise(void)
810{
811	struct suftab *tab;
812	char *cp;
813
814	for (tab = suftab; tab->suf; tab++) {
815		/* Assume that suffix will contain 'z' if a1 or d1 do */
816		if (strchr(tab->suf, 'z')) {
817			tab->suf = cp = estrdup(tab->suf);
818			ztos(cp);
819			if (strchr(tab->d1, 'z')) {
820				tab->d1 = cp = estrdup(tab->d1);
821				ztos(cp);
822			}
823			if (strchr(tab->a1, 'z')) {
824				tab->a1 = cp = estrdup(tab->a1);
825				ztos(cp);
826			}
827		}
828	}
829}
830
831static void
832ztos(char *st)
833{
834
835	for (; *st; st++)
836		if (*st == 'z')
837			*st = 's';
838}
839
840/*
841 * Look up a word in the dictionary.
842 * Returns 1 if found, 0 if not.
843 */
844static int
845dict(char *bp, char *ep)
846{
847	char c;
848	int i, rval;
849
850	c = *ep;
851	*ep = '\0';
852	if (xflag)
853		printf("=%s\n", bp);
854	for (i = rval = 0; wlists[i].fd != -1; i++) {
855		if ((rval = look((unsigned char *)bp, wlists[i].front,
856		    wlists[i].back)) == 1)
857			break;
858	}
859	*ep = c;
860	return rval;
861}
862
863static void
864getderiv(size_t lev)
865{
866	if (deriv.maxlev < lev) {
867		if (reallocarr(&deriv.buf, lev, sizeof(*deriv.buf)) != 0)
868			err(1, "Cannot grow array");
869		deriv.maxlev = lev;
870	}
871}
872
873
874static void
875usage(void)
876{
877	(void)fprintf(stderr,
878	    "Usage: %s [-bvx] [-o found-words] word-list ...\n",
879	    getprogname());
880	exit(1);
881}
882