1341825Sdim/*	$NetBSD: spellprog.c,v 1.7 2007/12/15 16:32:07 perry Exp $	*/
2218887Sdim
3218887Sdim/* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */
4218887Sdim
5218887Sdim/*
6218887Sdim * Copyright (c) 1991, 1993
7218887Sdim *	The Regents of the University of California.  All rights reserved.
8218887Sdim *
9218887Sdim * Redistribution and use in source and binary forms, with or without
10218887Sdim * modification, are permitted provided that the following conditions
11218887Sdim * are met:
12218887Sdim * 1. Redistributions of source code must retain the above copyright
13218887Sdim *    notice, this list of conditions and the following disclaimer.
14218887Sdim * 2. Redistributions in binary form must reproduce the above copyright
15218887Sdim *    notice, this list of conditions and the following disclaimer in the
16341825Sdim *    documentation and/or other materials provided with the distribution.
17341825Sdim * 3. Neither the name of the University nor the names of its contributors
18234353Sdim *    may be used to endorse or promote products derived from this software
19218887Sdim *    without specific prior written permission.
20261991Sdim *
21218887Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22341825Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23276479Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24218887Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25341825Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26249423Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27314564Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28249423Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29341825Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30341825Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31218887Sdim * SUCH DAMAGE.
32341825Sdim *
33341825Sdim *	@(#)spell.h	8.1 (Berkeley) 6/6/93
34249423Sdim */
35218887Sdim/*
36341825Sdim * Copyright (C) Caldera International Inc.  2001-2002.
37341825Sdim * All rights reserved.
38341825Sdim *
39249423Sdim * Redistribution and use in source and binary forms, with or without
40341825Sdim * modification, are permitted provided that the following conditions
41341825Sdim * are met:
42341825Sdim * 1. Redistributions of source code and documentation must retain the above
43341825Sdim *    copyright notice, this list of conditions and the following disclaimer.
44341825Sdim * 2. Redistributions in binary form must reproduce the above copyright
45218887Sdim *    notice, this list of conditions and the following disclaimer in the
46341825Sdim *    documentation and/or other materials provided with the distribution.
47341825Sdim * 3. All advertising materials mentioning features or use of this software
48341825Sdim *    must display the following acknowledgement:
49341825Sdim *	This product includes software developed or owned by Caldera
50249423Sdim *	International, Inc.
51341825Sdim * 4. Neither the name of Caldera International, Inc. nor the names of other
52234353Sdim *    contributors may be used to endorse or promote products derived from
53341825Sdim *    this software without specific prior written permission.
54249423Sdim *
55341825Sdim * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
56341825Sdim * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
57341825Sdim * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
58341825Sdim * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
59341825Sdim * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
60341825Sdim * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61249423Sdim * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62341825Sdim * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63341825Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64341825Sdim * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
65341825Sdim * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66276479Sdim * POSSIBILITY OF SUCH DAMAGE.
67218887Sdim */
68341825Sdim
69341825Sdim#include <sys/cdefs.h>
70341825Sdim
71341825Sdim#ifndef lint
72218887Sdimstatic const char copyright[] =
73218887Sdim"@(#) Copyright (c) 1991, 1993\n\
74218887Sdim	The Regents of the University of California.  All rights reserved.\n";
75218887Sdim#endif /* not lint */
76276479Sdim
77276479Sdim#ifndef lint
78249423Sdim#if 0
79249423Sdimstatic const char sccsid[] = "@(#)spell.c	8.1 (Berkeley) 6/6/93";
80249423Sdim#else
81249423Sdim#endif
82249423Sdimstatic const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $";
83249423Sdim#endif /* not lint */
84341825Sdim
85218887Sdim#include <sys/param.h>
86234353Sdim#include <sys/mman.h>
87234353Sdim#include <sys/stat.h>
88218887Sdim
89218887Sdim#include <ctype.h>
90218887Sdim#include <err.h>
91218887Sdim#include <errno.h>
92226633Sdim#include <fcntl.h>
93251662Sdim#include <limits.h>
94251662Sdim#include <locale.h>
95218887Sdim#include <stdio.h>
96218887Sdim#include <stdlib.h>
97276479Sdim#include <string.h>
98218887Sdim#include <unistd.h>
99218887Sdim#include <util.h>
100218887Sdim
101226633Sdim#include "extern.h"
102251662Sdim
103218887Sdim#define DLEV 2
104218887Sdim
105218887Sdimstatic int	 dict(char *, char *);
106218887Sdimstatic int	 trypref(char *, const char *, size_t);
107218887Sdimstatic int	 tryword(char *, char *, size_t);
108218887Sdimstatic int	 suffix(char *, size_t);
109234353Sdimstatic int	 vowel(int);
110234353Sdimstatic const char *lookuppref(char **, char *);
111234353Sdimstatic char	*skipv(char *);
112243830Sdimstatic void	 ise(void);
113243830Sdimstatic void	 print_word(FILE *);
114243830Sdimstatic void	 ztos(char *);
115243830Sdimstatic int	 monosyl(char *, char *);
116314564Sdimstatic void 	 usage(void) __dead;
117314564Sdimstatic void	 getderiv(size_t);
118314564Sdim
119243830Sdimstatic int	 an(char *, const char *, const char *, size_t);
120243830Sdimstatic int	 bility(char *, const char *, const char *, size_t);
121296417Sdimstatic int	 es(char *, const char *, const char *, size_t);
122243830Sdimstatic int	 i_to_y(char *, const char *, const char *, size_t);
123276479Sdimstatic int	 ily(char *, const char *, const char *, size_t);
124276479Sdimstatic int	 ize(char *, const char *, const char *, size_t);
125243830Sdimstatic int	 metry(char *, const char *, const char *, size_t);
126314564Sdimstatic int	 ncy(char *, const char *, const char *, size_t);
127296417Sdimstatic int	 nop(char *, const char *, const char *, size_t);
128243830Sdimstatic int	 s(char *, const char *, const char *, size_t);
129314564Sdimstatic int	 strip(char *, const char *, const char *, size_t);
130276479Sdimstatic int	 tion(char *, const char *, const char *, size_t);
131276479Sdimstatic int	 y_to_e(char *, const char *, const char *, size_t);
132243830Sdimstatic int	 CCe(char *, const char *, const char *, size_t);
133243830Sdimstatic int	 VCe(char *, const char *, const char *, size_t);
134243830Sdim
135243830Sdim/*
136243830Sdim * This cannot be const because we modify it when we choose british
137243830Sdim * spelling.
138243830Sdim */
139243830Sdimstatic struct suftab {
140243830Sdim	const char *suf;
141243830Sdim	int (*p1)(char *, const char *, const char *, size_t);
142243830Sdim	int n1;
143243830Sdim	const char *d1;
144243830Sdim	const char *a1;
145243830Sdim	int (*p2)(char *, const char *, const char *, size_t);
146243830Sdim	int n2;
147243830Sdim	const char *d2;
148314564Sdim	const char *a2;
149243830Sdim} suftab[] = {
150296417Sdim	{ .suf = "ssen",	.p1 = ily,	.n1 = 4,
151243830Sdim	  .d1 = "-y+iness", 	.a1 = "+ness" },
152314564Sdim	{ .suf = "ssel",	.p1 = ily,	.n1 = 4,
153314564Sdim	  .d1 = "-y+i+less", 	.a1 = "+less" },
154243830Sdim	{ .suf = "se",		.p1 = s,	.n1 = 1,
155314564Sdim	  .d1 = "", 		.a1 = "+s",	.p2 = es,
156314564Sdim	  .n2 = 2,		.d2 = "-y+ies",	.a2 = "+es" },
157243830Sdim	{ .suf = "s'",		.p1 = s,	.n1 = 2,
158314564Sdim	  .d1 = "", 		.a1 = "+'s" },
159243830Sdim	{ .suf = "s",		.p1 = s,	.n1 = 1,
160314564Sdim	  .d1 = "", 		.a1 = "+s" },
161243830Sdim	{ .suf = "ecn",		.p1 = ncy,	.n1 = 1,
162243830Sdim	  .d1 = "", 		.a1 = "-t+ce" },
163296417Sdim	{ .suf = "ycn",		.p1 = ncy,	.n1 = 1,
164341825Sdim	  .d1 = "", 		.a1 = "-cy+t" },
165276479Sdim	{ .suf = "ytilb",	.p1 = nop,	.n1 = 0,
166341825Sdim	  .d1 = "", 		.a1 = "" },
167243830Sdim	{ .suf = "ytilib",	.p1 = bility,	.n1 = 5,
168243830Sdim	  .d1 = "-le+ility", 	.a1 = "" },
169243830Sdim	{ .suf = "elbaif",	.p1 = i_to_y,	.n1 = 4,
170314564Sdim	  .d1 = "-y+iable", 	.a1 = "" },
171314564Sdim	{ .suf = "elba",	.p1 = CCe,	.n1 = 4,
172314564Sdim	  .d1 = "-e+able", 	.a1 = "+able" },
173243830Sdim	{ .suf = "yti",		.p1 = CCe,	.n1 = 3,
174243830Sdim	  .d1 = "-e+ity", 	.a1 = "+ity" },
175243830Sdim	{ .suf = "ylb",		.p1 = y_to_e,	.n1 = 1,
176243830Sdim	  .d1 = "-e+y", 	.a1 = "" },
177243830Sdim	{ .suf = "yl",		.p1 = ily,	.n1 = 2,
178243830Sdim	  .d1 = "-y+ily", 	.a1 = "+ly" },
179314564Sdim	{ .suf = "laci",	.p1 = strip,	.n1 = 2,
180314564Sdim	  .d1 = "", 		.a1 = "+al" },
181243830Sdim	{ .suf = "latnem",	.p1 = strip,	.n1 = 2,
182314564Sdim	  .d1 = "", 		.a1 = "+al" },
183243830Sdim	{ .suf = "lanoi",	.p1 = strip,	.n1 = 2,
184243830Sdim	  .d1 = "", 		.a1 = "+al" },
185243830Sdim	{ .suf = "tnem",	.p1 = strip,	.n1 = 4,
186251662Sdim	  .d1 = "", 		.a1 = "+ment" },
187251662Sdim	{ .suf = "gni",		.p1 = CCe,	.n1 = 3,
188341825Sdim	  .d1 = "-e+ing", 	.a1 = "+ing" },
189341825Sdim	{ .suf = "reta",	.p1 = nop,	.n1 = 0,
190251662Sdim	  .d1 = "", 		.a1 = "" },
191234353Sdim	{ .suf = "re",		.p1 = strip,	.n1 = 1,
192234353Sdim	  .d1 = "", 		.a1 = "+r",	.p2 = i_to_y,
193249423Sdim	  .n2 = 2,		.d2 = "-y+ier",	.a2 = "+er" },
194251662Sdim	{ .suf = "de",		.p1 = strip,	.n1 = 1,
195341825Sdim	  .d1 = "", 		.a1 = "+d",	.p2 = i_to_y,
196341825Sdim	  .n2 = 2,		.d2 = "-y+ied",	.a2 = "+ed" },
197341825Sdim	{ .suf = "citsi",	.p1 = strip,	.n1 = 2,
198234353Sdim	  .d1 = "", 		.a1 = "+ic" },
199296417Sdim	{ .suf = "cihparg",	.p1 = i_to_y,	.n1 = 1,
200234353Sdim	  .d1 = "-y+ic", 	.a1 = "" },
201234353Sdim	{ .suf = "tse",		.p1 = strip,	.n1 = 2,
202234353Sdim	  .d1 = "", 		.a1 = "+st",	.p2 = i_to_y,
203314564Sdim	  .n2 = 3,		.d2 = "-y+iest",.a2 = "+est" },
204234353Sdim	{ .suf = "cirtem",	.p1 = i_to_y,	.n1 = 1,
205296417Sdim	  .d1 = "-y+ic", 	.a1 = "" },
206234353Sdim	{ .suf = "yrtem",	.p1 = metry,	.n1 = 0,
207234353Sdim	  .d1 = "-ry+er", 	.a1 = "" },
208314564Sdim	{ .suf = "cigol",	.p1 = i_to_y,	.n1 = 1,
209243830Sdim	  .d1 = "-y+ic", 	.a1 = "" },
210314564Sdim	{ .suf = "tsigol",	.p1 = i_to_y,	.n1 = 2,
211341825Sdim	  .d1 = "-y+ist", 	.a1 = "" },
212341825Sdim	{ .suf = "tsi",		.p1 = VCe,	.n1 = 3,
213234353Sdim	  .d1 = "-e+ist", 	.a1 = "+ist" },
214296417Sdim	{ .suf = "msi",		.p1 = VCe,	.n1 = 3,
215234353Sdim	  .d1 = "-e+ism", 	.a1 = "+ist" },
216234353Sdim	{ .suf = "noitacif",	.p1 = i_to_y,	.n1 = 6,
217234353Sdim	  .d1 = "-y+ication", 	.a1 = "" },
218234353Sdim	{ .suf = "noitazi",	.p1 = ize,	.n1 = 5,
219314564Sdim	  .d1 = "-e+ation", 	.a1 = "" },
220341825Sdim	{ .suf = "rota",	.p1 = tion,	.n1 = 2,
221234353Sdim	  .d1 = "-e+or", 	.a1 = "" },
222234353Sdim	{ .suf = "noit",	.p1 = tion,	.n1 = 3,
223234353Sdim	  .d1 = "-e+ion", 	.a1 = "+ion" },
224234353Sdim	{ .suf = "naino",	.p1 = an,	.n1 = 3,
225234353Sdim	  .d1 = "", 		.a1 = "+ian" },
226314564Sdim	{ .suf = "na",		.p1 = an,	.n1 = 1,
227296417Sdim	  .d1 = "", 		.a1 = "+n" },
228234353Sdim	{ .suf = "evit",	.p1 = tion,	.n1 = 3,
229234353Sdim	  .d1 = "-e+ive", 	.a1 = "+ive" },
230314564Sdim	{ .suf = "ezi",		.p1 = CCe,	.n1 = 3,
231234353Sdim	  .d1 = "-e+ize", 	.a1 = "+ize" },
232234353Sdim	{ .suf = "pihs",	.p1 = strip,	.n1 = 4,
233234353Sdim	  .d1 = "", 		.a1 = "+ship" },
234234353Sdim	{ .suf = "dooh",	.p1 = ily,	.n1 = 4,
235314564Sdim	  .d1 = "-y+hood", 	.a1 = "+hood" },
236314564Sdim	{ .suf = "ekil",	.p1 = strip,	.n1 = 4,
237314564Sdim	  .d1 = "", 		.a1 = "+like" },
238234353Sdim	{ .suf = NULL, }
239296417Sdim};
240314564Sdim
241234353Sdimstatic const char *preftab[] = {
242296417Sdim	"anti",
243234353Sdim	"bio",
244234353Sdim	"dis",
245234353Sdim	"electro",
246261991Sdim	"en",
247261991Sdim	"fore",
248261991Sdim	"hyper",
249261991Sdim	"intra",
250261991Sdim	"inter",
251261991Sdim	"iso",
252261991Sdim	"kilo",
253249423Sdim	"magneto",
254296417Sdim	"meta",
255276479Sdim	"micro",
256276479Sdim	"milli",
257276479Sdim	"mis",
258341825Sdim	"mono",
259341825Sdim	"multi",
260249423Sdim	"non",
261341825Sdim	"out",
262249423Sdim	"over",
263249423Sdim	"photo",
264249423Sdim	"poly",
265261991Sdim	"pre",
266261991Sdim	"pseudo",
267249423Sdim	"re",
268261991Sdim	"semi",
269249423Sdim	"stereo",
270249423Sdim	"sub",
271249423Sdim	"super",
272249423Sdim	"thermo",
273249423Sdim	"ultra",
274249423Sdim	"under",	/* must precede un */
275249423Sdim	"un",
276261991Sdim	NULL
277249423Sdim};
278249423Sdim
279249423Sdimstatic struct wlist {
280249423Sdim	int fd;
281249423Sdim	unsigned char *front;
282249423Sdim	unsigned char *back;
283249423Sdim} *wlists;
284249423Sdim
285249423Sdimstatic int vflag;
286261991Sdimstatic int xflag;
287261991Sdimstatic char word[LINE_MAX];
288261991Sdimstatic char original[LINE_MAX];
289261991Sdimstatic char affix[LINE_MAX];
290261991Sdimstatic struct {
291314564Sdim	const char **buf;
292261991Sdim	size_t maxlev;
293261991Sdim} deriv;
294314564Sdim
295261991Sdim/*
296261991Sdim * The spellprog utility accepts a newline-delimited list of words
297314564Sdim * on stdin.  For arguments it expects the path to a word list and
298261991Sdim * the path to a file in which to store found words.
299261991Sdim *
300261991Sdim * In normal usage, spell is called twice.  The first time it is
301261991Sdim * called with a stop list to flag commonly mispelled words.  The
302261991Sdim * remaining words are then passed to spell again, this time with
303261991Sdim * the dictionary file as the first (non-flag) argument.
304276479Sdim *
305261991Sdim * Unlike historic versions of spellprog, this one does not use
306314564Sdim * hashed files.  Instead it simply requires that files be sorted
307314564Sdim * lexigraphically and uses the same algorithm as the look utility.
308261991Sdim *
309261991Sdim * Note that spellprog should be called via the spell shell script
310261991Sdim * and is not meant to be invoked directly by the user.
311261991Sdim */
312261991Sdim
313261991Sdimint
314261991Sdimmain(int argc, char **argv)
315261991Sdim{
316261991Sdim	char *ep, *cp, *dp;
317261991Sdim	char *outfile;
318261991Sdim	int ch, fold, i;
319261991Sdim	struct stat sb;
320261991Sdim	FILE *file, *found;
321261991Sdim
322261991Sdim	setlocale(LC_ALL, "");
323261991Sdim
324261991Sdim	outfile = NULL;
325314564Sdim	while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
326261991Sdim		switch (ch) {
327261991Sdim		case 'b':
328314564Sdim			/* Use British dictionary and convert ize -> ise. */
329261991Sdim			ise();
330261991Sdim			break;
331261991Sdim		case 'o':
332261991Sdim			outfile = optarg;
333261991Sdim			break;
334261991Sdim		case 'v':
335261991Sdim			/* Also write derivations to "found" file. */
336261991Sdim			vflag++;
337261991Sdim			break;
338261991Sdim		case 'x':
339261991Sdim			/* Print plausible stems to stdout. */
340234353Sdim			xflag++;
341218887Sdim			break;
342218887Sdim		default:
343218887Sdim			usage();
344218887Sdim		}
345218887Sdim
346218887Sdim	}
347218887Sdim	argc -= optind;
348226633Sdim	argv += optind;
349341825Sdim	if (argc < 1)
350218887Sdim		usage();
351234353Sdim
352296417Sdim	/* Open and mmap the word/stop lists. */
353218887Sdim	if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL)
354249423Sdim		err(1, "malloc");
355226633Sdim
356341825Sdim	for (i = 0; argc--; i++) {
357341825Sdim		wlists[i].fd = open(argv[i], O_RDONLY, 0);
358218887Sdim		if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
359226633Sdim			err(1, "%s", argv[i]);
360218887Sdim		if (sb.st_size > SIZE_T_MAX)
361226633Sdim			errx(1, "%s: %s", argv[i], strerror(EFBIG));
362226633Sdim		wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
363218887Sdim		    MAP_PRIVATE, wlists[i].fd, (off_t)0);
364226633Sdim		if (wlists[i].front == MAP_FAILED)
365226633Sdim			err(1, "%s", argv[i]);
366218887Sdim		wlists[i].back = wlists[i].front + (size_t)sb.st_size;
367296417Sdim	}
368234353Sdim	wlists[i].fd = -1;
369218887Sdim
370218887Sdim	/* Open file where found words are to be saved. */
371218887Sdim	if (outfile == NULL)
372218887Sdim		found = NULL;
373218887Sdim	else if ((found = fopen(outfile, "w")) == NULL)
374218887Sdim		err(1, "cannot open %s", outfile);
375218887Sdim
376226633Sdim	for (;; print_word(file)) {
377341825Sdim		affix[0] = '\0';
378218887Sdim		file = found;
379218887Sdim		for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
380218887Sdim			if (ep - word == sizeof(word) - 1) {
381218887Sdim				*ep = '\0';
382218887Sdim				warnx("word too long (%s)", word);
383218887Sdim				while ((ch = getchar()) != '\n')
384218887Sdim					;	/* slurp until EOL */
385341825Sdim			}
386341825Sdim			if (ch == EOF) {
387218887Sdim				if (found != NULL)
388226633Sdim					fclose(found);
389251662Sdim				exit(0);
390234353Sdim			}
391218887Sdim		}
392226633Sdim		for (cp = word, dp = original; cp < ep; )
393226633Sdim			*dp++ = *cp++;
394218887Sdim		*dp = '\0';
395218887Sdim		fold = 0;
396218887Sdim		for (cp = word; cp < ep; cp++)
397226633Sdim			if (islower((unsigned char)*cp))
398226633Sdim				goto lcase;
399218887Sdim		if (trypref(ep, ".", 0))
400218887Sdim			continue;
401218887Sdim		++fold;
402218887Sdim		for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
403218887Sdim			*dp = tolower((unsigned char)*cp);
404218887Sdimlcase:
405218887Sdim		if (trypref(ep, ".", 0) || suffix(ep, 0))
406218887Sdim			continue;
407226633Sdim		if (isupper((unsigned char)word[0])) {
408218887Sdim			for (cp = original, dp = word; (*dp = *cp++); dp++) {
409218887Sdim				if (fold)
410218887Sdim					*dp = tolower((unsigned char)*dp);
411218887Sdim			}
412218887Sdim			word[0] = tolower((unsigned char)word[0]);
413218887Sdim			goto lcase;
414218887Sdim		}
415218887Sdim		file = stdout;
416218887Sdim	}
417218887Sdim}
418218887Sdim
419218887Sdimstatic void
420218887Sdimprint_word(FILE *f)
421218887Sdim{
422218887Sdim
423218887Sdim	if (f != NULL) {
424218887Sdim		if (vflag && affix[0] != '\0' && affix[0] != '.')
425218887Sdim			fprintf(f, "%s\t%s\n", affix, original);
426261991Sdim		else
427218887Sdim			fprintf(f, "%s\n", original);
428261991Sdim	}
429218887Sdim}
430218887Sdim
431261991Sdim/*
432276479Sdim * For each matching suffix in suftab, call the function associated
433218887Sdim * with that suffix (p1 and p2).
434261991Sdim */
435261991Sdimstatic int
436261991Sdimsuffix(char *ep, size_t lev)
437261991Sdim{
438261991Sdim	const struct suftab *t;
439261991Sdim	char *cp;
440261991Sdim	const char *sp;
441261991Sdim
442261991Sdim	lev += DLEV;
443261991Sdim	getderiv(lev + 1);
444218887Sdim	deriv.buf[lev] = deriv.buf[lev - 1] = 0;
445276479Sdim	for (t = suftab; (sp = t->suf) != NULL; t++) {
446218887Sdim		cp = ep;
447218887Sdim		while (*sp) {
448261991Sdim			if (*--cp != *sp++)
449261991Sdim				goto next;
450261991Sdim		}
451261991Sdim		for (sp = cp; --sp >= word && !vowel(*sp);)
452341825Sdim			;	/* nothing */
453218887Sdim		if (sp < word)
454261991Sdim			return 0;
455218887Sdim		if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1))
456218887Sdim			return 1;
457341825Sdim		if (t->p2 != NULL) {
458218887Sdim			deriv.buf[lev] = deriv.buf[lev + 1] = '\0';
459261991Sdim			return (*t->p2)(ep - t->n2, t->d2, t->a2, lev);
460218887Sdim		}
461218887Sdim		return 0;
462218887Sdimnext:		;
463218887Sdim	}
464226633Sdim	return 0;
465218887Sdim}
466218887Sdim
467218887Sdimstatic int
468261991Sdim/*ARGSUSED*/
469226633Sdimnop(char *ep, const char *d, const char *a, size_t lev)
470218887Sdim{
471226633Sdim
472218887Sdim	return 0;
473218887Sdim}
474218887Sdim
475218887Sdimstatic int
476261991Sdim/*ARGSUSED*/
477261991Sdimstrip(char *ep, const char *d, const char *a, size_t lev)
478226633Sdim{
479218887Sdim
480226633Sdim	return trypref(ep, a, lev) || suffix(ep, lev);
481261991Sdim}
482261991Sdim
483261991Sdimstatic int
484261991Sdims(char *ep, const char *d, const char *a, const size_t lev)
485218887Sdim{
486226633Sdim
487218887Sdim	if (lev > DLEV + 1)
488218887Sdim		return 0;
489226633Sdim	if (*ep == 's' && ep[-1] == 's')
490218887Sdim		return 0;
491218887Sdim	return strip(ep, d, a, lev);
492218887Sdim}
493226633Sdim
494218887Sdimstatic int
495218887Sdim/*ARGSUSED*/
496218887Sdiman(char *ep, const char *d, const char *a, size_t lev)
497226633Sdim{
498218887Sdim
499218887Sdim	if (!isupper((unsigned char)*word))	/* must be proper name */
500218887Sdim		return 0;
501226633Sdim	return trypref(ep, a, lev);
502218887Sdim}
503218887Sdim
504218887Sdimstatic int
505218887Sdim/*ARGSUSED*/
506218887Sdimize(char *ep, const char *d, const char *a, size_t lev)
507218887Sdim{
508218887Sdim
509218887Sdim	*ep++ = 'e';
510218887Sdim	return strip(ep ,"", d, lev);
511218887Sdim}
512226633Sdim
513218887Sdimstatic int
514218887Sdim/*ARGSUSED*/
515261991Sdimy_to_e(char *ep, const char *d, const char *a, size_t lev)
516261991Sdim{
517261991Sdim	char c = *ep;
518261991Sdim
519261991Sdim	*ep++ = 'e';
520261991Sdim	if (strip(ep, "", d, lev))
521261991Sdim		return 1;
522218887Sdim	ep[-1] = c;
523234353Sdim	return 0;
524218887Sdim}
525341825Sdim
526341825Sdimstatic int
527341825Sdimily(char *ep, const char *d, const char *a, size_t lev)
528218887Sdim{
529314564Sdim
530234353Sdim	if (ep[-1] == 'i')
531234353Sdim		return i_to_y(ep, d, a, lev);
532234353Sdim	else
533341825Sdim		return strip(ep, d, a, lev);
534234353Sdim}
535341825Sdim
536341825Sdimstatic int
537341825Sdimncy(char *ep, const char *d, const char *a, size_t lev)
538234353Sdim{
539218887Sdim
540234353Sdim	if (skipv(skipv(ep - 1)) < word)
541234353Sdim		return 0;
542234353Sdim	ep[-1] = 't';
543234353Sdim	return strip(ep, d, a, lev);
544234353Sdim}
545234353Sdim
546218887Sdimstatic int
547218887Sdimbility(char *ep, const char *d, const char *a, size_t lev)
548218887Sdim{
549234353Sdim
550218887Sdim	*ep++ = 'l';
551218887Sdim	return y_to_e(ep, d, a, lev);
552341825Sdim}
553341825Sdim
554341825Sdimstatic int
555341825Sdimi_to_y(char *ep, const char *d, const char *a, size_t lev)
556341825Sdim{
557341825Sdim
558341825Sdim	if (ep[-1] == 'i') {
559341825Sdim		ep[-1] = 'y';
560341825Sdim		a = d;
561341825Sdim	}
562341825Sdim	return strip(ep, "", a, lev);
563341825Sdim}
564234353Sdim
565341825Sdimstatic int
566341825Sdimes(char *ep, const char *d, const char *a, size_t lev)
567234353Sdim{
568341825Sdim
569341825Sdim	if (lev > DLEV)
570341825Sdim		return 0;
571341825Sdim
572341825Sdim	switch (ep[-1]) {
573341825Sdim	default:
574341825Sdim		return 0;
575341825Sdim	case 'i':
576341825Sdim		return i_to_y(ep, d, a, lev);
577341825Sdim	case 's':
578218887Sdim	case 'h':
579341825Sdim	case 'z':
580341825Sdim	case 'x':
581341825Sdim		return strip(ep, d, a, lev);
582341825Sdim	}
583239462Sdim}
584341825Sdim
585341825Sdimstatic int
586239462Sdimmetry(char *ep, const char *d, const char *a, size_t lev)
587341825Sdim{
588341825Sdim
589341825Sdim	ep[-2] = 'e';
590341825Sdim	ep[-1] = 'r';
591243830Sdim	return strip(ep, d, a, lev);
592341825Sdim}
593341825Sdim
594341825Sdimstatic int
595243830Sdimtion(char *ep, const char *d, const char *a, size_t lev)
596234353Sdim{
597218887Sdim
598341825Sdim	switch (ep[-2]) {
599341825Sdim	case 'c':
600218887Sdim	case 'r':
601341825Sdim		return trypref(ep, a, lev);
602341825Sdim	case 'a':
603218887Sdim		return y_to_e(ep, d, a, lev);
604341825Sdim	}
605341825Sdim	return 0;
606341825Sdim}
607341825Sdim
608218887Sdim/*
609341825Sdim * Possible consonant-consonant-e ending.
610341825Sdim */
611218887Sdimstatic int
612218887SdimCCe(char *ep, const char *d, const char *a, size_t lev)
613218887Sdim{
614341825Sdim
615341825Sdim	switch (ep[-1]) {
616341825Sdim	case 'l':
617341825Sdim		if (vowel(ep[-2]))
618341825Sdim			break;
619341825Sdim		switch (ep[-2]) {
620341825Sdim		case 'l':
621341825Sdim		case 'r':
622341825Sdim		case 'w':
623218887Sdim			break;
624218887Sdim		default:
625218887Sdim			return y_to_e(ep, d, a, lev);
626341825Sdim		}
627341825Sdim		break;
628341825Sdim	case 's':
629341825Sdim		if (ep[-2] == 's')
630341825Sdim			break;
631341825Sdim		/*FALLTHROUGH*/
632341825Sdim	case 'c':
633341825Sdim	case 'g':
634341825Sdim		if (*ep == 'a')
635341825Sdim			return 0;
636341825Sdim		/*FALLTHROUGH*/
637341825Sdim	case 'v':
638341825Sdim	case 'z':
639218887Sdim		if (vowel(ep[-2]))
640341825Sdim			break;
641341825Sdim		/*FALLTHROUGH*/
642341825Sdim	case 'u':
643218887Sdim		if (y_to_e(ep, d, a, lev))
644341825Sdim			return 1;
645341825Sdim		if (!(ep[-2] == 'n' && ep[-1] == 'g'))
646341825Sdim			return 0;
647341825Sdim	}
648341825Sdim	return VCe(ep, d, a, lev);
649341825Sdim}
650341825Sdim
651341825Sdim/*
652341825Sdim * Possible consonant-vowel-consonant-e ending.
653341825Sdim */
654341825Sdimstatic int
655341825SdimVCe(char *ep, const char *d, const char *a, size_t lev)
656341825Sdim{
657341825Sdim	char c;
658251662Sdim
659341825Sdim	c = ep[-1];
660341825Sdim	if (c == 'e')
661341825Sdim		return 0;
662341825Sdim	if (!vowel(c) && vowel(ep[-2])) {
663341825Sdim		c = *ep;
664341825Sdim		*ep++ = 'e';
665341825Sdim		if (trypref(ep, d, lev) || suffix(ep, lev))
666341825Sdim			return 1;
667341825Sdim		ep--;
668251662Sdim		*ep = c;
669251662Sdim	}
670341825Sdim	return strip(ep, d, a, lev);
671341825Sdim}
672251662Sdim
673251662Sdimstatic const char *
674341825Sdimlookuppref(char **wp, char *ep)
675341825Sdim{
676341825Sdim	const char **sp, *cp;
677341825Sdim	char *bp;
678341825Sdim
679341825Sdim	for (sp = preftab; *sp; sp++) {
680341825Sdim		bp = *wp;
681341825Sdim		for (cp = *sp; *cp; cp++, bp++) {
682341825Sdim			if (tolower((unsigned char)*bp) != *cp)
683341825Sdim				goto next;
684218887Sdim		}
685341825Sdim		for (cp = bp; cp < ep; cp++) {
686341825Sdim			if (vowel(*cp)) {
687341825Sdim				*wp = bp;
688341825Sdim				return *sp;
689218887Sdim			}
690341825Sdim		}
691341825Sdimnext:		;
692341825Sdim	}
693341825Sdim	return 0;
694341825Sdim}
695218887Sdim
696218887Sdim/*
697341825Sdim * If the word is not in the dictionary, try stripping off prefixes
698341825Sdim * until the word is found or we run out of prefixes to check.
699341825Sdim */
700341825Sdimstatic int
701218887Sdimtrypref(char *ep, const char *a, size_t lev)
702218887Sdim{
703341825Sdim	const char *cp;
704341825Sdim	char *bp;
705341825Sdim	char *pp;
706341825Sdim	int val = 0;
707341825Sdim	char space[20];
708341825Sdim
709341825Sdim	getderiv(lev + 2);
710341825Sdim	deriv.buf[lev] = a;
711218887Sdim	if (tryword(word, ep, lev))
712218887Sdim		return 1;
713341825Sdim	bp = word;
714341825Sdim	pp = space;
715341825Sdim	deriv.buf[lev + 1] = pp;
716341825Sdim	while ((cp = lookuppref(&bp, ep)) != NULL) {
717341825Sdim		*pp++ = '+';
718341825Sdim		while ((*pp = *cp++))
719296417Sdim			pp++;
720341825Sdim		if (tryword(bp, ep, lev + 1)) {
721341825Sdim			val = 1;
722341825Sdim			break;
723341825Sdim		}
724218887Sdim		if (pp - space >= sizeof(space))
725341825Sdim			return 0;
726218887Sdim	}
727341825Sdim	deriv.buf[lev + 1] = deriv.buf[lev + 2] = '\0';
728341825Sdim	return val;
729218887Sdim}
730341825Sdim
731341825Sdimstatic int
732341825Sdimtryword(char *bp, char *ep, size_t lev)
733218887Sdim{
734218887Sdim	size_t i, j;
735341825Sdim	char duple[3];
736341825Sdim
737341825Sdim	if (ep-bp <= 1)
738341825Sdim		return 0;
739218887Sdim	if (vowel(*ep) && monosyl(bp, ep))
740341825Sdim		return 0;
741341825Sdim
742341825Sdim	i = dict(bp, ep);
743341825Sdim	if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] &&
744218887Sdim	    monosyl(bp, ep - 1)) {
745218887Sdim		ep--;
746341825Sdim		getderiv(++lev);
747341825Sdim		deriv.buf[lev] = duple;
748341825Sdim		duple[0] = '+';
749341825Sdim		duple[1] = *ep;
750218887Sdim		duple[2] = '\0';
751341825Sdim		i = dict(bp, ep);
752341825Sdim	}
753296417Sdim	if (vflag == 0 || i == 0)
754341825Sdim		return i;
755341825Sdim
756218887Sdim	/* Also tack on possible derivations. (XXX - warn on truncation?) */
757341825Sdim	for (j = lev; j > 0; j--) {
758341825Sdim		if (deriv.buf[j])
759341825Sdim			(void)strlcat(affix, deriv.buf[j], sizeof(affix));
760341825Sdim	}
761341825Sdim	return i;
762218887Sdim}
763341825Sdim
764341825Sdimstatic int
765218887Sdimmonosyl(char *bp, char *ep)
766341825Sdim{
767341825Sdim
768341825Sdim	if (ep < bp + 2)
769341825Sdim		return 0;
770341825Sdim	if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
771218887Sdim		return 0;
772341825Sdim	while (--ep >= bp)
773341825Sdim		if (vowel(*ep))
774341825Sdim			return 0;
775341825Sdim	return 1;
776341825Sdim}
777218887Sdim
778341825Sdimstatic char *
779341825Sdimskipv(char *st)
780341825Sdim{
781341825Sdim
782218887Sdim	if (st >= word && vowel(*st))
783341825Sdim		st--;
784341825Sdim	while (st >= word && !vowel(*st))
785341825Sdim		st--;
786341825Sdim	return st;
787341825Sdim}
788341825Sdim
789341825Sdimstatic int
790218887Sdimvowel(int c)
791341825Sdim{
792341825Sdim
793341825Sdim	switch (tolower(c)) {
794218887Sdim	case 'a':
795218887Sdim	case 'e':
796341825Sdim	case 'i':
797218887Sdim	case 'o':
798341825Sdim	case 'u':
799341825Sdim	case 'y':
800218887Sdim		return 1;
801341825Sdim	}
802341825Sdim	return 0;
803218887Sdim}
804341825Sdim
805341825Sdim/*
806341825Sdim * Crummy way to Britishise.
807341825Sdim */
808218887Sdimstatic void
809341825Sdimise(void)
810341825Sdim{
811341825Sdim	struct suftab *tab;
812218887Sdim	char *cp;
813341825Sdim
814218887Sdim	for (tab = suftab; tab->suf; tab++) {
815218887Sdim		/* Assume that suffix will contain 'z' if a1 or d1 do */
816218887Sdim		if (strchr(tab->suf, 'z')) {
817218887Sdim			tab->suf = cp = estrdup(tab->suf);
818239462Sdim			ztos(cp);
819239462Sdim			if (strchr(tab->d1, 'z')) {
820239462Sdim				tab->d1 = cp = estrdup(tab->d1);
821239462Sdim				ztos(cp);
822239462Sdim			}
823239462Sdim			if (strchr(tab->a1, 'z')) {
824239462Sdim				tab->a1 = cp = estrdup(tab->a1);
825239462Sdim				ztos(cp);
826239462Sdim			}
827239462Sdim		}
828239462Sdim	}
829341825Sdim}
830239462Sdim
831239462Sdimstatic void
832239462Sdimztos(char *st)
833239462Sdim{
834239462Sdim
835239462Sdim	for (; *st; st++)
836239462Sdim		if (*st == 'z')
837239462Sdim			*st = 's';
838239462Sdim}
839296417Sdim
840239462Sdim/*
841239462Sdim * Look up a word in the dictionary.
842239462Sdim * Returns 1 if found, 0 if not.
843239462Sdim */
844239462Sdimstatic int
845239462Sdimdict(char *bp, char *ep)
846239462Sdim{
847288943Sdim	char c;
848341825Sdim	int i, rval;
849239462Sdim
850239462Sdim	c = *ep;
851239462Sdim	*ep = '\0';
852239462Sdim	if (xflag)
853239462Sdim		printf("=%s\n", bp);
854276479Sdim	for (i = rval = 0; wlists[i].fd != -1; i++) {
855239462Sdim		if ((rval = look((unsigned char *)bp, wlists[i].front,
856239462Sdim		    wlists[i].back)) == 1)
857296417Sdim			break;
858239462Sdim	}
859239462Sdim	*ep = c;
860239462Sdim	return rval;
861239462Sdim}
862239462Sdim
863239462Sdimstatic void
864239462Sdimgetderiv(size_t lev)
865239462Sdim{
866239462Sdim	if (deriv.maxlev < lev) {
867239462Sdim		void *p = realloc(deriv.buf, sizeof(*deriv.buf) * lev);
868341825Sdim		if (p == NULL)
869239462Sdim			err(1, "Cannot grow array");
870341825Sdim		deriv.buf = p;
871341825Sdim		deriv.maxlev = lev;
872296417Sdim	}
873239462Sdim}
874239462Sdim
875239462Sdim
876239462Sdimstatic void
877239462Sdimusage(void)
878239462Sdim{
879239462Sdim	(void)fprintf(stderr,
880239462Sdim	    "Usage: %s [-bvx] [-o found-words] word-list ...\n",
881239462Sdim	    getprogname());
882239462Sdim	exit(1);
883239462Sdim}
884239462Sdim