1341825Sdim/* $NetBSD: spellprog.c,v 1.7 2007/12/15 16:32:07 perry Exp $ */ 2218887Sdim 3218887Sdim/* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */ 4218887Sdim 5218887Sdim/* 6218887Sdim * Copyright (c) 1991, 1993 7218887Sdim * The Regents of the University of California. All rights reserved. 8218887Sdim * 9218887Sdim * Redistribution and use in source and binary forms, with or without 10218887Sdim * modification, are permitted provided that the following conditions 11218887Sdim * are met: 12218887Sdim * 1. Redistributions of source code must retain the above copyright 13218887Sdim * notice, this list of conditions and the following disclaimer. 14218887Sdim * 2. Redistributions in binary form must reproduce the above copyright 15218887Sdim * notice, this list of conditions and the following disclaimer in the 16341825Sdim * documentation and/or other materials provided with the distribution. 17341825Sdim * 3. Neither the name of the University nor the names of its contributors 18234353Sdim * may be used to endorse or promote products derived from this software 19218887Sdim * without specific prior written permission. 20261991Sdim * 21218887Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22341825Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23276479Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24218887Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25341825Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26249423Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27314564Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28249423Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29341825Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30341825Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31218887Sdim * SUCH DAMAGE. 32341825Sdim * 33341825Sdim * @(#)spell.h 8.1 (Berkeley) 6/6/93 34249423Sdim */ 35218887Sdim/* 36341825Sdim * Copyright (C) Caldera International Inc. 2001-2002. 37341825Sdim * All rights reserved. 38341825Sdim * 39249423Sdim * Redistribution and use in source and binary forms, with or without 40341825Sdim * modification, are permitted provided that the following conditions 41341825Sdim * are met: 42341825Sdim * 1. Redistributions of source code and documentation must retain the above 43341825Sdim * copyright notice, this list of conditions and the following disclaimer. 44341825Sdim * 2. Redistributions in binary form must reproduce the above copyright 45218887Sdim * notice, this list of conditions and the following disclaimer in the 46341825Sdim * documentation and/or other materials provided with the distribution. 47341825Sdim * 3. All advertising materials mentioning features or use of this software 48341825Sdim * must display the following acknowledgement: 49341825Sdim * This product includes software developed or owned by Caldera 50249423Sdim * International, Inc. 51341825Sdim * 4. Neither the name of Caldera International, Inc. nor the names of other 52234353Sdim * contributors may be used to endorse or promote products derived from 53341825Sdim * this software without specific prior written permission. 54249423Sdim * 55341825Sdim * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 56341825Sdim * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 57341825Sdim * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 58341825Sdim * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 59341825Sdim * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 60341825Sdim * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 61249423Sdim * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 62341825Sdim * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63341825Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 64341825Sdim * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 65341825Sdim * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66276479Sdim * POSSIBILITY OF SUCH DAMAGE. 67218887Sdim */ 68341825Sdim 69341825Sdim#include <sys/cdefs.h> 70341825Sdim 71341825Sdim#ifndef lint 72218887Sdimstatic const char copyright[] = 73218887Sdim"@(#) Copyright (c) 1991, 1993\n\ 74218887Sdim The Regents of the University of California. All rights reserved.\n"; 75218887Sdim#endif /* not lint */ 76276479Sdim 77276479Sdim#ifndef lint 78249423Sdim#if 0 79249423Sdimstatic const char sccsid[] = "@(#)spell.c 8.1 (Berkeley) 6/6/93"; 80249423Sdim#else 81249423Sdim#endif 82249423Sdimstatic const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $"; 83249423Sdim#endif /* not lint */ 84341825Sdim 85218887Sdim#include <sys/param.h> 86234353Sdim#include <sys/mman.h> 87234353Sdim#include <sys/stat.h> 88218887Sdim 89218887Sdim#include <ctype.h> 90218887Sdim#include <err.h> 91218887Sdim#include <errno.h> 92226633Sdim#include <fcntl.h> 93251662Sdim#include <limits.h> 94251662Sdim#include <locale.h> 95218887Sdim#include <stdio.h> 96218887Sdim#include <stdlib.h> 97276479Sdim#include <string.h> 98218887Sdim#include <unistd.h> 99218887Sdim#include <util.h> 100218887Sdim 101226633Sdim#include "extern.h" 102251662Sdim 103218887Sdim#define DLEV 2 104218887Sdim 105218887Sdimstatic int dict(char *, char *); 106218887Sdimstatic int trypref(char *, const char *, size_t); 107218887Sdimstatic int tryword(char *, char *, size_t); 108218887Sdimstatic int suffix(char *, size_t); 109234353Sdimstatic int vowel(int); 110234353Sdimstatic const char *lookuppref(char **, char *); 111234353Sdimstatic char *skipv(char *); 112243830Sdimstatic void ise(void); 113243830Sdimstatic void print_word(FILE *); 114243830Sdimstatic void ztos(char *); 115243830Sdimstatic int monosyl(char *, char *); 116314564Sdimstatic void usage(void) __dead; 117314564Sdimstatic void getderiv(size_t); 118314564Sdim 119243830Sdimstatic int an(char *, const char *, const char *, size_t); 120243830Sdimstatic int bility(char *, const char *, const char *, size_t); 121296417Sdimstatic int es(char *, const char *, const char *, size_t); 122243830Sdimstatic int i_to_y(char *, const char *, const char *, size_t); 123276479Sdimstatic int ily(char *, const char *, const char *, size_t); 124276479Sdimstatic int ize(char *, const char *, const char *, size_t); 125243830Sdimstatic int metry(char *, const char *, const char *, size_t); 126314564Sdimstatic int ncy(char *, const char *, const char *, size_t); 127296417Sdimstatic int nop(char *, const char *, const char *, size_t); 128243830Sdimstatic int s(char *, const char *, const char *, size_t); 129314564Sdimstatic int strip(char *, const char *, const char *, size_t); 130276479Sdimstatic int tion(char *, const char *, const char *, size_t); 131276479Sdimstatic int y_to_e(char *, const char *, const char *, size_t); 132243830Sdimstatic int CCe(char *, const char *, const char *, size_t); 133243830Sdimstatic int VCe(char *, const char *, const char *, size_t); 134243830Sdim 135243830Sdim/* 136243830Sdim * This cannot be const because we modify it when we choose british 137243830Sdim * spelling. 138243830Sdim */ 139243830Sdimstatic struct suftab { 140243830Sdim const char *suf; 141243830Sdim int (*p1)(char *, const char *, const char *, size_t); 142243830Sdim int n1; 143243830Sdim const char *d1; 144243830Sdim const char *a1; 145243830Sdim int (*p2)(char *, const char *, const char *, size_t); 146243830Sdim int n2; 147243830Sdim const char *d2; 148314564Sdim const char *a2; 149243830Sdim} suftab[] = { 150296417Sdim { .suf = "ssen", .p1 = ily, .n1 = 4, 151243830Sdim .d1 = "-y+iness", .a1 = "+ness" }, 152314564Sdim { .suf = "ssel", .p1 = ily, .n1 = 4, 153314564Sdim .d1 = "-y+i+less", .a1 = "+less" }, 154243830Sdim { .suf = "se", .p1 = s, .n1 = 1, 155314564Sdim .d1 = "", .a1 = "+s", .p2 = es, 156314564Sdim .n2 = 2, .d2 = "-y+ies", .a2 = "+es" }, 157243830Sdim { .suf = "s'", .p1 = s, .n1 = 2, 158314564Sdim .d1 = "", .a1 = "+'s" }, 159243830Sdim { .suf = "s", .p1 = s, .n1 = 1, 160314564Sdim .d1 = "", .a1 = "+s" }, 161243830Sdim { .suf = "ecn", .p1 = ncy, .n1 = 1, 162243830Sdim .d1 = "", .a1 = "-t+ce" }, 163296417Sdim { .suf = "ycn", .p1 = ncy, .n1 = 1, 164341825Sdim .d1 = "", .a1 = "-cy+t" }, 165276479Sdim { .suf = "ytilb", .p1 = nop, .n1 = 0, 166341825Sdim .d1 = "", .a1 = "" }, 167243830Sdim { .suf = "ytilib", .p1 = bility, .n1 = 5, 168243830Sdim .d1 = "-le+ility", .a1 = "" }, 169243830Sdim { .suf = "elbaif", .p1 = i_to_y, .n1 = 4, 170314564Sdim .d1 = "-y+iable", .a1 = "" }, 171314564Sdim { .suf = "elba", .p1 = CCe, .n1 = 4, 172314564Sdim .d1 = "-e+able", .a1 = "+able" }, 173243830Sdim { .suf = "yti", .p1 = CCe, .n1 = 3, 174243830Sdim .d1 = "-e+ity", .a1 = "+ity" }, 175243830Sdim { .suf = "ylb", .p1 = y_to_e, .n1 = 1, 176243830Sdim .d1 = "-e+y", .a1 = "" }, 177243830Sdim { .suf = "yl", .p1 = ily, .n1 = 2, 178243830Sdim .d1 = "-y+ily", .a1 = "+ly" }, 179314564Sdim { .suf = "laci", .p1 = strip, .n1 = 2, 180314564Sdim .d1 = "", .a1 = "+al" }, 181243830Sdim { .suf = "latnem", .p1 = strip, .n1 = 2, 182314564Sdim .d1 = "", .a1 = "+al" }, 183243830Sdim { .suf = "lanoi", .p1 = strip, .n1 = 2, 184243830Sdim .d1 = "", .a1 = "+al" }, 185243830Sdim { .suf = "tnem", .p1 = strip, .n1 = 4, 186251662Sdim .d1 = "", .a1 = "+ment" }, 187251662Sdim { .suf = "gni", .p1 = CCe, .n1 = 3, 188341825Sdim .d1 = "-e+ing", .a1 = "+ing" }, 189341825Sdim { .suf = "reta", .p1 = nop, .n1 = 0, 190251662Sdim .d1 = "", .a1 = "" }, 191234353Sdim { .suf = "re", .p1 = strip, .n1 = 1, 192234353Sdim .d1 = "", .a1 = "+r", .p2 = i_to_y, 193249423Sdim .n2 = 2, .d2 = "-y+ier", .a2 = "+er" }, 194251662Sdim { .suf = "de", .p1 = strip, .n1 = 1, 195341825Sdim .d1 = "", .a1 = "+d", .p2 = i_to_y, 196341825Sdim .n2 = 2, .d2 = "-y+ied", .a2 = "+ed" }, 197341825Sdim { .suf = "citsi", .p1 = strip, .n1 = 2, 198234353Sdim .d1 = "", .a1 = "+ic" }, 199296417Sdim { .suf = "cihparg", .p1 = i_to_y, .n1 = 1, 200234353Sdim .d1 = "-y+ic", .a1 = "" }, 201234353Sdim { .suf = "tse", .p1 = strip, .n1 = 2, 202234353Sdim .d1 = "", .a1 = "+st", .p2 = i_to_y, 203314564Sdim .n2 = 3, .d2 = "-y+iest",.a2 = "+est" }, 204234353Sdim { .suf = "cirtem", .p1 = i_to_y, .n1 = 1, 205296417Sdim .d1 = "-y+ic", .a1 = "" }, 206234353Sdim { .suf = "yrtem", .p1 = metry, .n1 = 0, 207234353Sdim .d1 = "-ry+er", .a1 = "" }, 208314564Sdim { .suf = "cigol", .p1 = i_to_y, .n1 = 1, 209243830Sdim .d1 = "-y+ic", .a1 = "" }, 210314564Sdim { .suf = "tsigol", .p1 = i_to_y, .n1 = 2, 211341825Sdim .d1 = "-y+ist", .a1 = "" }, 212341825Sdim { .suf = "tsi", .p1 = VCe, .n1 = 3, 213234353Sdim .d1 = "-e+ist", .a1 = "+ist" }, 214296417Sdim { .suf = "msi", .p1 = VCe, .n1 = 3, 215234353Sdim .d1 = "-e+ism", .a1 = "+ist" }, 216234353Sdim { .suf = "noitacif", .p1 = i_to_y, .n1 = 6, 217234353Sdim .d1 = "-y+ication", .a1 = "" }, 218234353Sdim { .suf = "noitazi", .p1 = ize, .n1 = 5, 219314564Sdim .d1 = "-e+ation", .a1 = "" }, 220341825Sdim { .suf = "rota", .p1 = tion, .n1 = 2, 221234353Sdim .d1 = "-e+or", .a1 = "" }, 222234353Sdim { .suf = "noit", .p1 = tion, .n1 = 3, 223234353Sdim .d1 = "-e+ion", .a1 = "+ion" }, 224234353Sdim { .suf = "naino", .p1 = an, .n1 = 3, 225234353Sdim .d1 = "", .a1 = "+ian" }, 226314564Sdim { .suf = "na", .p1 = an, .n1 = 1, 227296417Sdim .d1 = "", .a1 = "+n" }, 228234353Sdim { .suf = "evit", .p1 = tion, .n1 = 3, 229234353Sdim .d1 = "-e+ive", .a1 = "+ive" }, 230314564Sdim { .suf = "ezi", .p1 = CCe, .n1 = 3, 231234353Sdim .d1 = "-e+ize", .a1 = "+ize" }, 232234353Sdim { .suf = "pihs", .p1 = strip, .n1 = 4, 233234353Sdim .d1 = "", .a1 = "+ship" }, 234234353Sdim { .suf = "dooh", .p1 = ily, .n1 = 4, 235314564Sdim .d1 = "-y+hood", .a1 = "+hood" }, 236314564Sdim { .suf = "ekil", .p1 = strip, .n1 = 4, 237314564Sdim .d1 = "", .a1 = "+like" }, 238234353Sdim { .suf = NULL, } 239296417Sdim}; 240314564Sdim 241234353Sdimstatic const char *preftab[] = { 242296417Sdim "anti", 243234353Sdim "bio", 244234353Sdim "dis", 245234353Sdim "electro", 246261991Sdim "en", 247261991Sdim "fore", 248261991Sdim "hyper", 249261991Sdim "intra", 250261991Sdim "inter", 251261991Sdim "iso", 252261991Sdim "kilo", 253249423Sdim "magneto", 254296417Sdim "meta", 255276479Sdim "micro", 256276479Sdim "milli", 257276479Sdim "mis", 258341825Sdim "mono", 259341825Sdim "multi", 260249423Sdim "non", 261341825Sdim "out", 262249423Sdim "over", 263249423Sdim "photo", 264249423Sdim "poly", 265261991Sdim "pre", 266261991Sdim "pseudo", 267249423Sdim "re", 268261991Sdim "semi", 269249423Sdim "stereo", 270249423Sdim "sub", 271249423Sdim "super", 272249423Sdim "thermo", 273249423Sdim "ultra", 274249423Sdim "under", /* must precede un */ 275249423Sdim "un", 276261991Sdim NULL 277249423Sdim}; 278249423Sdim 279249423Sdimstatic struct wlist { 280249423Sdim int fd; 281249423Sdim unsigned char *front; 282249423Sdim unsigned char *back; 283249423Sdim} *wlists; 284249423Sdim 285249423Sdimstatic int vflag; 286261991Sdimstatic int xflag; 287261991Sdimstatic char word[LINE_MAX]; 288261991Sdimstatic char original[LINE_MAX]; 289261991Sdimstatic char affix[LINE_MAX]; 290261991Sdimstatic struct { 291314564Sdim const char **buf; 292261991Sdim size_t maxlev; 293261991Sdim} deriv; 294314564Sdim 295261991Sdim/* 296261991Sdim * The spellprog utility accepts a newline-delimited list of words 297314564Sdim * on stdin. For arguments it expects the path to a word list and 298261991Sdim * the path to a file in which to store found words. 299261991Sdim * 300261991Sdim * In normal usage, spell is called twice. The first time it is 301261991Sdim * called with a stop list to flag commonly mispelled words. The 302261991Sdim * remaining words are then passed to spell again, this time with 303261991Sdim * the dictionary file as the first (non-flag) argument. 304276479Sdim * 305261991Sdim * Unlike historic versions of spellprog, this one does not use 306314564Sdim * hashed files. Instead it simply requires that files be sorted 307314564Sdim * lexigraphically and uses the same algorithm as the look utility. 308261991Sdim * 309261991Sdim * Note that spellprog should be called via the spell shell script 310261991Sdim * and is not meant to be invoked directly by the user. 311261991Sdim */ 312261991Sdim 313261991Sdimint 314261991Sdimmain(int argc, char **argv) 315261991Sdim{ 316261991Sdim char *ep, *cp, *dp; 317261991Sdim char *outfile; 318261991Sdim int ch, fold, i; 319261991Sdim struct stat sb; 320261991Sdim FILE *file, *found; 321261991Sdim 322261991Sdim setlocale(LC_ALL, ""); 323261991Sdim 324261991Sdim outfile = NULL; 325314564Sdim while ((ch = getopt(argc, argv, "bvxo:")) != -1) { 326261991Sdim switch (ch) { 327261991Sdim case 'b': 328314564Sdim /* Use British dictionary and convert ize -> ise. */ 329261991Sdim ise(); 330261991Sdim break; 331261991Sdim case 'o': 332261991Sdim outfile = optarg; 333261991Sdim break; 334261991Sdim case 'v': 335261991Sdim /* Also write derivations to "found" file. */ 336261991Sdim vflag++; 337261991Sdim break; 338261991Sdim case 'x': 339261991Sdim /* Print plausible stems to stdout. */ 340234353Sdim xflag++; 341218887Sdim break; 342218887Sdim default: 343218887Sdim usage(); 344218887Sdim } 345218887Sdim 346218887Sdim } 347218887Sdim argc -= optind; 348226633Sdim argv += optind; 349341825Sdim if (argc < 1) 350218887Sdim usage(); 351234353Sdim 352296417Sdim /* Open and mmap the word/stop lists. */ 353218887Sdim if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL) 354249423Sdim err(1, "malloc"); 355226633Sdim 356341825Sdim for (i = 0; argc--; i++) { 357341825Sdim wlists[i].fd = open(argv[i], O_RDONLY, 0); 358218887Sdim if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0) 359226633Sdim err(1, "%s", argv[i]); 360218887Sdim if (sb.st_size > SIZE_T_MAX) 361226633Sdim errx(1, "%s: %s", argv[i], strerror(EFBIG)); 362226633Sdim wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ, 363218887Sdim MAP_PRIVATE, wlists[i].fd, (off_t)0); 364226633Sdim if (wlists[i].front == MAP_FAILED) 365226633Sdim err(1, "%s", argv[i]); 366218887Sdim wlists[i].back = wlists[i].front + (size_t)sb.st_size; 367296417Sdim } 368234353Sdim wlists[i].fd = -1; 369218887Sdim 370218887Sdim /* Open file where found words are to be saved. */ 371218887Sdim if (outfile == NULL) 372218887Sdim found = NULL; 373218887Sdim else if ((found = fopen(outfile, "w")) == NULL) 374218887Sdim err(1, "cannot open %s", outfile); 375218887Sdim 376226633Sdim for (;; print_word(file)) { 377341825Sdim affix[0] = '\0'; 378218887Sdim file = found; 379218887Sdim for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) { 380218887Sdim if (ep - word == sizeof(word) - 1) { 381218887Sdim *ep = '\0'; 382218887Sdim warnx("word too long (%s)", word); 383218887Sdim while ((ch = getchar()) != '\n') 384218887Sdim ; /* slurp until EOL */ 385341825Sdim } 386341825Sdim if (ch == EOF) { 387218887Sdim if (found != NULL) 388226633Sdim fclose(found); 389251662Sdim exit(0); 390234353Sdim } 391218887Sdim } 392226633Sdim for (cp = word, dp = original; cp < ep; ) 393226633Sdim *dp++ = *cp++; 394218887Sdim *dp = '\0'; 395218887Sdim fold = 0; 396218887Sdim for (cp = word; cp < ep; cp++) 397226633Sdim if (islower((unsigned char)*cp)) 398226633Sdim goto lcase; 399218887Sdim if (trypref(ep, ".", 0)) 400218887Sdim continue; 401218887Sdim ++fold; 402218887Sdim for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++) 403218887Sdim *dp = tolower((unsigned char)*cp); 404218887Sdimlcase: 405218887Sdim if (trypref(ep, ".", 0) || suffix(ep, 0)) 406218887Sdim continue; 407226633Sdim if (isupper((unsigned char)word[0])) { 408218887Sdim for (cp = original, dp = word; (*dp = *cp++); dp++) { 409218887Sdim if (fold) 410218887Sdim *dp = tolower((unsigned char)*dp); 411218887Sdim } 412218887Sdim word[0] = tolower((unsigned char)word[0]); 413218887Sdim goto lcase; 414218887Sdim } 415218887Sdim file = stdout; 416218887Sdim } 417218887Sdim} 418218887Sdim 419218887Sdimstatic void 420218887Sdimprint_word(FILE *f) 421218887Sdim{ 422218887Sdim 423218887Sdim if (f != NULL) { 424218887Sdim if (vflag && affix[0] != '\0' && affix[0] != '.') 425218887Sdim fprintf(f, "%s\t%s\n", affix, original); 426261991Sdim else 427218887Sdim fprintf(f, "%s\n", original); 428261991Sdim } 429218887Sdim} 430218887Sdim 431261991Sdim/* 432276479Sdim * For each matching suffix in suftab, call the function associated 433218887Sdim * with that suffix (p1 and p2). 434261991Sdim */ 435261991Sdimstatic int 436261991Sdimsuffix(char *ep, size_t lev) 437261991Sdim{ 438261991Sdim const struct suftab *t; 439261991Sdim char *cp; 440261991Sdim const char *sp; 441261991Sdim 442261991Sdim lev += DLEV; 443261991Sdim getderiv(lev + 1); 444218887Sdim deriv.buf[lev] = deriv.buf[lev - 1] = 0; 445276479Sdim for (t = suftab; (sp = t->suf) != NULL; t++) { 446218887Sdim cp = ep; 447218887Sdim while (*sp) { 448261991Sdim if (*--cp != *sp++) 449261991Sdim goto next; 450261991Sdim } 451261991Sdim for (sp = cp; --sp >= word && !vowel(*sp);) 452341825Sdim ; /* nothing */ 453218887Sdim if (sp < word) 454261991Sdim return 0; 455218887Sdim if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1)) 456218887Sdim return 1; 457341825Sdim if (t->p2 != NULL) { 458218887Sdim deriv.buf[lev] = deriv.buf[lev + 1] = '\0'; 459261991Sdim return (*t->p2)(ep - t->n2, t->d2, t->a2, lev); 460218887Sdim } 461218887Sdim return 0; 462218887Sdimnext: ; 463218887Sdim } 464226633Sdim return 0; 465218887Sdim} 466218887Sdim 467218887Sdimstatic int 468261991Sdim/*ARGSUSED*/ 469226633Sdimnop(char *ep, const char *d, const char *a, size_t lev) 470218887Sdim{ 471226633Sdim 472218887Sdim return 0; 473218887Sdim} 474218887Sdim 475218887Sdimstatic int 476261991Sdim/*ARGSUSED*/ 477261991Sdimstrip(char *ep, const char *d, const char *a, size_t lev) 478226633Sdim{ 479218887Sdim 480226633Sdim return trypref(ep, a, lev) || suffix(ep, lev); 481261991Sdim} 482261991Sdim 483261991Sdimstatic int 484261991Sdims(char *ep, const char *d, const char *a, const size_t lev) 485218887Sdim{ 486226633Sdim 487218887Sdim if (lev > DLEV + 1) 488218887Sdim return 0; 489226633Sdim if (*ep == 's' && ep[-1] == 's') 490218887Sdim return 0; 491218887Sdim return strip(ep, d, a, lev); 492218887Sdim} 493226633Sdim 494218887Sdimstatic int 495218887Sdim/*ARGSUSED*/ 496218887Sdiman(char *ep, const char *d, const char *a, size_t lev) 497226633Sdim{ 498218887Sdim 499218887Sdim if (!isupper((unsigned char)*word)) /* must be proper name */ 500218887Sdim return 0; 501226633Sdim return trypref(ep, a, lev); 502218887Sdim} 503218887Sdim 504218887Sdimstatic int 505218887Sdim/*ARGSUSED*/ 506218887Sdimize(char *ep, const char *d, const char *a, size_t lev) 507218887Sdim{ 508218887Sdim 509218887Sdim *ep++ = 'e'; 510218887Sdim return strip(ep ,"", d, lev); 511218887Sdim} 512226633Sdim 513218887Sdimstatic int 514218887Sdim/*ARGSUSED*/ 515261991Sdimy_to_e(char *ep, const char *d, const char *a, size_t lev) 516261991Sdim{ 517261991Sdim char c = *ep; 518261991Sdim 519261991Sdim *ep++ = 'e'; 520261991Sdim if (strip(ep, "", d, lev)) 521261991Sdim return 1; 522218887Sdim ep[-1] = c; 523234353Sdim return 0; 524218887Sdim} 525341825Sdim 526341825Sdimstatic int 527341825Sdimily(char *ep, const char *d, const char *a, size_t lev) 528218887Sdim{ 529314564Sdim 530234353Sdim if (ep[-1] == 'i') 531234353Sdim return i_to_y(ep, d, a, lev); 532234353Sdim else 533341825Sdim return strip(ep, d, a, lev); 534234353Sdim} 535341825Sdim 536341825Sdimstatic int 537341825Sdimncy(char *ep, const char *d, const char *a, size_t lev) 538234353Sdim{ 539218887Sdim 540234353Sdim if (skipv(skipv(ep - 1)) < word) 541234353Sdim return 0; 542234353Sdim ep[-1] = 't'; 543234353Sdim return strip(ep, d, a, lev); 544234353Sdim} 545234353Sdim 546218887Sdimstatic int 547218887Sdimbility(char *ep, const char *d, const char *a, size_t lev) 548218887Sdim{ 549234353Sdim 550218887Sdim *ep++ = 'l'; 551218887Sdim return y_to_e(ep, d, a, lev); 552341825Sdim} 553341825Sdim 554341825Sdimstatic int 555341825Sdimi_to_y(char *ep, const char *d, const char *a, size_t lev) 556341825Sdim{ 557341825Sdim 558341825Sdim if (ep[-1] == 'i') { 559341825Sdim ep[-1] = 'y'; 560341825Sdim a = d; 561341825Sdim } 562341825Sdim return strip(ep, "", a, lev); 563341825Sdim} 564234353Sdim 565341825Sdimstatic int 566341825Sdimes(char *ep, const char *d, const char *a, size_t lev) 567234353Sdim{ 568341825Sdim 569341825Sdim if (lev > DLEV) 570341825Sdim return 0; 571341825Sdim 572341825Sdim switch (ep[-1]) { 573341825Sdim default: 574341825Sdim return 0; 575341825Sdim case 'i': 576341825Sdim return i_to_y(ep, d, a, lev); 577341825Sdim case 's': 578218887Sdim case 'h': 579341825Sdim case 'z': 580341825Sdim case 'x': 581341825Sdim return strip(ep, d, a, lev); 582341825Sdim } 583239462Sdim} 584341825Sdim 585341825Sdimstatic int 586239462Sdimmetry(char *ep, const char *d, const char *a, size_t lev) 587341825Sdim{ 588341825Sdim 589341825Sdim ep[-2] = 'e'; 590341825Sdim ep[-1] = 'r'; 591243830Sdim return strip(ep, d, a, lev); 592341825Sdim} 593341825Sdim 594341825Sdimstatic int 595243830Sdimtion(char *ep, const char *d, const char *a, size_t lev) 596234353Sdim{ 597218887Sdim 598341825Sdim switch (ep[-2]) { 599341825Sdim case 'c': 600218887Sdim case 'r': 601341825Sdim return trypref(ep, a, lev); 602341825Sdim case 'a': 603218887Sdim return y_to_e(ep, d, a, lev); 604341825Sdim } 605341825Sdim return 0; 606341825Sdim} 607341825Sdim 608218887Sdim/* 609341825Sdim * Possible consonant-consonant-e ending. 610341825Sdim */ 611218887Sdimstatic int 612218887SdimCCe(char *ep, const char *d, const char *a, size_t lev) 613218887Sdim{ 614341825Sdim 615341825Sdim switch (ep[-1]) { 616341825Sdim case 'l': 617341825Sdim if (vowel(ep[-2])) 618341825Sdim break; 619341825Sdim switch (ep[-2]) { 620341825Sdim case 'l': 621341825Sdim case 'r': 622341825Sdim case 'w': 623218887Sdim break; 624218887Sdim default: 625218887Sdim return y_to_e(ep, d, a, lev); 626341825Sdim } 627341825Sdim break; 628341825Sdim case 's': 629341825Sdim if (ep[-2] == 's') 630341825Sdim break; 631341825Sdim /*FALLTHROUGH*/ 632341825Sdim case 'c': 633341825Sdim case 'g': 634341825Sdim if (*ep == 'a') 635341825Sdim return 0; 636341825Sdim /*FALLTHROUGH*/ 637341825Sdim case 'v': 638341825Sdim case 'z': 639218887Sdim if (vowel(ep[-2])) 640341825Sdim break; 641341825Sdim /*FALLTHROUGH*/ 642341825Sdim case 'u': 643218887Sdim if (y_to_e(ep, d, a, lev)) 644341825Sdim return 1; 645341825Sdim if (!(ep[-2] == 'n' && ep[-1] == 'g')) 646341825Sdim return 0; 647341825Sdim } 648341825Sdim return VCe(ep, d, a, lev); 649341825Sdim} 650341825Sdim 651341825Sdim/* 652341825Sdim * Possible consonant-vowel-consonant-e ending. 653341825Sdim */ 654341825Sdimstatic int 655341825SdimVCe(char *ep, const char *d, const char *a, size_t lev) 656341825Sdim{ 657341825Sdim char c; 658251662Sdim 659341825Sdim c = ep[-1]; 660341825Sdim if (c == 'e') 661341825Sdim return 0; 662341825Sdim if (!vowel(c) && vowel(ep[-2])) { 663341825Sdim c = *ep; 664341825Sdim *ep++ = 'e'; 665341825Sdim if (trypref(ep, d, lev) || suffix(ep, lev)) 666341825Sdim return 1; 667341825Sdim ep--; 668251662Sdim *ep = c; 669251662Sdim } 670341825Sdim return strip(ep, d, a, lev); 671341825Sdim} 672251662Sdim 673251662Sdimstatic const char * 674341825Sdimlookuppref(char **wp, char *ep) 675341825Sdim{ 676341825Sdim const char **sp, *cp; 677341825Sdim char *bp; 678341825Sdim 679341825Sdim for (sp = preftab; *sp; sp++) { 680341825Sdim bp = *wp; 681341825Sdim for (cp = *sp; *cp; cp++, bp++) { 682341825Sdim if (tolower((unsigned char)*bp) != *cp) 683341825Sdim goto next; 684218887Sdim } 685341825Sdim for (cp = bp; cp < ep; cp++) { 686341825Sdim if (vowel(*cp)) { 687341825Sdim *wp = bp; 688341825Sdim return *sp; 689218887Sdim } 690341825Sdim } 691341825Sdimnext: ; 692341825Sdim } 693341825Sdim return 0; 694341825Sdim} 695218887Sdim 696218887Sdim/* 697341825Sdim * If the word is not in the dictionary, try stripping off prefixes 698341825Sdim * until the word is found or we run out of prefixes to check. 699341825Sdim */ 700341825Sdimstatic int 701218887Sdimtrypref(char *ep, const char *a, size_t lev) 702218887Sdim{ 703341825Sdim const char *cp; 704341825Sdim char *bp; 705341825Sdim char *pp; 706341825Sdim int val = 0; 707341825Sdim char space[20]; 708341825Sdim 709341825Sdim getderiv(lev + 2); 710341825Sdim deriv.buf[lev] = a; 711218887Sdim if (tryword(word, ep, lev)) 712218887Sdim return 1; 713341825Sdim bp = word; 714341825Sdim pp = space; 715341825Sdim deriv.buf[lev + 1] = pp; 716341825Sdim while ((cp = lookuppref(&bp, ep)) != NULL) { 717341825Sdim *pp++ = '+'; 718341825Sdim while ((*pp = *cp++)) 719296417Sdim pp++; 720341825Sdim if (tryword(bp, ep, lev + 1)) { 721341825Sdim val = 1; 722341825Sdim break; 723341825Sdim } 724218887Sdim if (pp - space >= sizeof(space)) 725341825Sdim return 0; 726218887Sdim } 727341825Sdim deriv.buf[lev + 1] = deriv.buf[lev + 2] = '\0'; 728341825Sdim return val; 729218887Sdim} 730341825Sdim 731341825Sdimstatic int 732341825Sdimtryword(char *bp, char *ep, size_t lev) 733218887Sdim{ 734218887Sdim size_t i, j; 735341825Sdim char duple[3]; 736341825Sdim 737341825Sdim if (ep-bp <= 1) 738341825Sdim return 0; 739218887Sdim if (vowel(*ep) && monosyl(bp, ep)) 740341825Sdim return 0; 741341825Sdim 742341825Sdim i = dict(bp, ep); 743341825Sdim if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && 744218887Sdim monosyl(bp, ep - 1)) { 745218887Sdim ep--; 746341825Sdim getderiv(++lev); 747341825Sdim deriv.buf[lev] = duple; 748341825Sdim duple[0] = '+'; 749341825Sdim duple[1] = *ep; 750218887Sdim duple[2] = '\0'; 751341825Sdim i = dict(bp, ep); 752341825Sdim } 753296417Sdim if (vflag == 0 || i == 0) 754341825Sdim return i; 755341825Sdim 756218887Sdim /* Also tack on possible derivations. (XXX - warn on truncation?) */ 757341825Sdim for (j = lev; j > 0; j--) { 758341825Sdim if (deriv.buf[j]) 759341825Sdim (void)strlcat(affix, deriv.buf[j], sizeof(affix)); 760341825Sdim } 761341825Sdim return i; 762218887Sdim} 763341825Sdim 764341825Sdimstatic int 765218887Sdimmonosyl(char *bp, char *ep) 766341825Sdim{ 767341825Sdim 768341825Sdim if (ep < bp + 2) 769341825Sdim return 0; 770341825Sdim if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w') 771218887Sdim return 0; 772341825Sdim while (--ep >= bp) 773341825Sdim if (vowel(*ep)) 774341825Sdim return 0; 775341825Sdim return 1; 776341825Sdim} 777218887Sdim 778341825Sdimstatic char * 779341825Sdimskipv(char *st) 780341825Sdim{ 781341825Sdim 782218887Sdim if (st >= word && vowel(*st)) 783341825Sdim st--; 784341825Sdim while (st >= word && !vowel(*st)) 785341825Sdim st--; 786341825Sdim return st; 787341825Sdim} 788341825Sdim 789341825Sdimstatic int 790218887Sdimvowel(int c) 791341825Sdim{ 792341825Sdim 793341825Sdim switch (tolower(c)) { 794218887Sdim case 'a': 795218887Sdim case 'e': 796341825Sdim case 'i': 797218887Sdim case 'o': 798341825Sdim case 'u': 799341825Sdim case 'y': 800218887Sdim return 1; 801341825Sdim } 802341825Sdim return 0; 803218887Sdim} 804341825Sdim 805341825Sdim/* 806341825Sdim * Crummy way to Britishise. 807341825Sdim */ 808218887Sdimstatic void 809341825Sdimise(void) 810341825Sdim{ 811341825Sdim struct suftab *tab; 812218887Sdim char *cp; 813341825Sdim 814218887Sdim for (tab = suftab; tab->suf; tab++) { 815218887Sdim /* Assume that suffix will contain 'z' if a1 or d1 do */ 816218887Sdim if (strchr(tab->suf, 'z')) { 817218887Sdim tab->suf = cp = estrdup(tab->suf); 818239462Sdim ztos(cp); 819239462Sdim if (strchr(tab->d1, 'z')) { 820239462Sdim tab->d1 = cp = estrdup(tab->d1); 821239462Sdim ztos(cp); 822239462Sdim } 823239462Sdim if (strchr(tab->a1, 'z')) { 824239462Sdim tab->a1 = cp = estrdup(tab->a1); 825239462Sdim ztos(cp); 826239462Sdim } 827239462Sdim } 828239462Sdim } 829341825Sdim} 830239462Sdim 831239462Sdimstatic void 832239462Sdimztos(char *st) 833239462Sdim{ 834239462Sdim 835239462Sdim for (; *st; st++) 836239462Sdim if (*st == 'z') 837239462Sdim *st = 's'; 838239462Sdim} 839296417Sdim 840239462Sdim/* 841239462Sdim * Look up a word in the dictionary. 842239462Sdim * Returns 1 if found, 0 if not. 843239462Sdim */ 844239462Sdimstatic int 845239462Sdimdict(char *bp, char *ep) 846239462Sdim{ 847288943Sdim char c; 848341825Sdim int i, rval; 849239462Sdim 850239462Sdim c = *ep; 851239462Sdim *ep = '\0'; 852239462Sdim if (xflag) 853239462Sdim printf("=%s\n", bp); 854276479Sdim for (i = rval = 0; wlists[i].fd != -1; i++) { 855239462Sdim if ((rval = look((unsigned char *)bp, wlists[i].front, 856239462Sdim wlists[i].back)) == 1) 857296417Sdim break; 858239462Sdim } 859239462Sdim *ep = c; 860239462Sdim return rval; 861239462Sdim} 862239462Sdim 863239462Sdimstatic void 864239462Sdimgetderiv(size_t lev) 865239462Sdim{ 866239462Sdim if (deriv.maxlev < lev) { 867239462Sdim void *p = realloc(deriv.buf, sizeof(*deriv.buf) * lev); 868341825Sdim if (p == NULL) 869239462Sdim err(1, "Cannot grow array"); 870341825Sdim deriv.buf = p; 871341825Sdim deriv.maxlev = lev; 872296417Sdim } 873239462Sdim} 874239462Sdim 875239462Sdim 876239462Sdimstatic void 877239462Sdimusage(void) 878239462Sdim{ 879239462Sdim (void)fprintf(stderr, 880239462Sdim "Usage: %s [-bvx] [-o found-words] word-list ...\n", 881239462Sdim getprogname()); 882239462Sdim exit(1); 883239462Sdim} 884239462Sdim