1/* $NetBSD: spellprog.c,v 1.10 2021/11/09 09:41:05 nia Exp $ */ 2 3/* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */ 4 5/* 6 * Copyright (c) 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)spell.h 8.1 (Berkeley) 6/6/93 34 */ 35/* 36 * Copyright (C) Caldera International Inc. 2001-2002. 37 * All rights reserved. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code and documentation must retain the above 43 * copyright notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed or owned by Caldera 50 * International, Inc. 51 * 4. Neither the name of Caldera International, Inc. nor the names of other 52 * contributors may be used to endorse or promote products derived from 53 * this software without specific prior written permission. 54 * 55 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 56 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 58 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 59 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 60 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 65 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69#include <sys/cdefs.h> 70 71#ifndef lint 72static const char copyright[] = 73"@(#) Copyright (c) 1991, 1993\n\ 74 The Regents of the University of California. All rights reserved.\n"; 75#endif /* not lint */ 76 77#ifndef lint 78#if 0 79static const char sccsid[] = "@(#)spell.c 8.1 (Berkeley) 6/6/93"; 80#else 81#endif 82static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $"; 83#endif /* not lint */ 84 85#include <sys/param.h> 86#include <sys/mman.h> 87#include <sys/stat.h> 88 89#include <ctype.h> 90#include <err.h> 91#include <errno.h> 92#include <fcntl.h> 93#include <limits.h> 94#include <locale.h> 95#include <stdio.h> 96#include <stdlib.h> 97#include <string.h> 98#include <unistd.h> 99#include <util.h> 100 101#include "extern.h" 102 103#define DLEV 2 104 105static int dict(char *, char *); 106static int trypref(char *, const char *, size_t); 107static int tryword(char *, char *, size_t); 108static int suffix(char *, size_t); 109static int vowel(int); 110static const char *lookuppref(char **, char *); 111static char *skipv(char *); 112static void ise(void); 113static void print_word(FILE *); 114static void ztos(char *); 115static int monosyl(char *, char *); 116static void usage(void) __dead; 117static void getderiv(size_t); 118 119static int an(char *, const char *, const char *, size_t); 120static int bility(char *, const char *, const char *, size_t); 121static int es(char *, const char *, const char *, size_t); 122static int i_to_y(char *, const char *, const char *, size_t); 123static int ily(char *, const char *, const char *, size_t); 124static int ize(char *, const char *, const char *, size_t); 125static int metry(char *, const char *, const char *, size_t); 126static int ncy(char *, const char *, const char *, size_t); 127static int nop(char *, const char *, const char *, size_t); 128static int s(char *, const char *, const char *, size_t); 129static int strip(char *, const char *, const char *, size_t); 130static int tion(char *, const char *, const char *, size_t); 131static int y_to_e(char *, const char *, const char *, size_t); 132static int CCe(char *, const char *, const char *, size_t); 133static int VCe(char *, const char *, const char *, size_t); 134 135/* 136 * This cannot be const because we modify it when we choose british 137 * spelling. 138 */ 139static struct suftab { 140 const char *suf; 141 int (*p1)(char *, const char *, const char *, size_t); 142 int n1; 143 const char *d1; 144 const char *a1; 145 int (*p2)(char *, const char *, const char *, size_t); 146 int n2; 147 const char *d2; 148 const char *a2; 149} suftab[] = { 150 { .suf = "ssen", .p1 = ily, .n1 = 4, 151 .d1 = "-y+iness", .a1 = "+ness" }, 152 { .suf = "ssel", .p1 = ily, .n1 = 4, 153 .d1 = "-y+i+less", .a1 = "+less" }, 154 { .suf = "se", .p1 = s, .n1 = 1, 155 .d1 = "", .a1 = "+s", .p2 = es, 156 .n2 = 2, .d2 = "-y+ies", .a2 = "+es" }, 157 { .suf = "s'", .p1 = s, .n1 = 2, 158 .d1 = "", .a1 = "+'s" }, 159 { .suf = "s", .p1 = s, .n1 = 1, 160 .d1 = "", .a1 = "+s" }, 161 { .suf = "ecn", .p1 = ncy, .n1 = 1, 162 .d1 = "", .a1 = "-t+ce" }, 163 { .suf = "ycn", .p1 = ncy, .n1 = 1, 164 .d1 = "", .a1 = "-cy+t" }, 165 { .suf = "ytilb", .p1 = nop, .n1 = 0, 166 .d1 = "", .a1 = "" }, 167 { .suf = "ytilib", .p1 = bility, .n1 = 5, 168 .d1 = "-le+ility", .a1 = "" }, 169 { .suf = "elbaif", .p1 = i_to_y, .n1 = 4, 170 .d1 = "-y+iable", .a1 = "" }, 171 { .suf = "elba", .p1 = CCe, .n1 = 4, 172 .d1 = "-e+able", .a1 = "+able" }, 173 { .suf = "yti", .p1 = CCe, .n1 = 3, 174 .d1 = "-e+ity", .a1 = "+ity" }, 175 { .suf = "ylb", .p1 = y_to_e, .n1 = 1, 176 .d1 = "-e+y", .a1 = "" }, 177 { .suf = "yl", .p1 = ily, .n1 = 2, 178 .d1 = "-y+ily", .a1 = "+ly" }, 179 { .suf = "laci", .p1 = strip, .n1 = 2, 180 .d1 = "", .a1 = "+al" }, 181 { .suf = "latnem", .p1 = strip, .n1 = 2, 182 .d1 = "", .a1 = "+al" }, 183 { .suf = "lanoi", .p1 = strip, .n1 = 2, 184 .d1 = "", .a1 = "+al" }, 185 { .suf = "tnem", .p1 = strip, .n1 = 4, 186 .d1 = "", .a1 = "+ment" }, 187 { .suf = "gni", .p1 = CCe, .n1 = 3, 188 .d1 = "-e+ing", .a1 = "+ing" }, 189 { .suf = "reta", .p1 = nop, .n1 = 0, 190 .d1 = "", .a1 = "" }, 191 { .suf = "re", .p1 = strip, .n1 = 1, 192 .d1 = "", .a1 = "+r", .p2 = i_to_y, 193 .n2 = 2, .d2 = "-y+ier", .a2 = "+er" }, 194 { .suf = "de", .p1 = strip, .n1 = 1, 195 .d1 = "", .a1 = "+d", .p2 = i_to_y, 196 .n2 = 2, .d2 = "-y+ied", .a2 = "+ed" }, 197 { .suf = "citsi", .p1 = strip, .n1 = 2, 198 .d1 = "", .a1 = "+ic" }, 199 { .suf = "cihparg", .p1 = i_to_y, .n1 = 1, 200 .d1 = "-y+ic", .a1 = "" }, 201 { .suf = "tse", .p1 = strip, .n1 = 2, 202 .d1 = "", .a1 = "+st", .p2 = i_to_y, 203 .n2 = 3, .d2 = "-y+iest",.a2 = "+est" }, 204 { .suf = "cirtem", .p1 = i_to_y, .n1 = 1, 205 .d1 = "-y+ic", .a1 = "" }, 206 { .suf = "yrtem", .p1 = metry, .n1 = 0, 207 .d1 = "-ry+er", .a1 = "" }, 208 { .suf = "cigol", .p1 = i_to_y, .n1 = 1, 209 .d1 = "-y+ic", .a1 = "" }, 210 { .suf = "tsigol", .p1 = i_to_y, .n1 = 2, 211 .d1 = "-y+ist", .a1 = "" }, 212 { .suf = "tsi", .p1 = VCe, .n1 = 3, 213 .d1 = "-e+ist", .a1 = "+ist" }, 214 { .suf = "msi", .p1 = VCe, .n1 = 3, 215 .d1 = "-e+ism", .a1 = "+ist" }, 216 { .suf = "noitacif", .p1 = i_to_y, .n1 = 6, 217 .d1 = "-y+ication", .a1 = "" }, 218 { .suf = "noitazi", .p1 = ize, .n1 = 5, 219 .d1 = "-e+ation", .a1 = "" }, 220 { .suf = "rota", .p1 = tion, .n1 = 2, 221 .d1 = "-e+or", .a1 = "" }, 222 { .suf = "noit", .p1 = tion, .n1 = 3, 223 .d1 = "-e+ion", .a1 = "+ion" }, 224 { .suf = "naino", .p1 = an, .n1 = 3, 225 .d1 = "", .a1 = "+ian" }, 226 { .suf = "na", .p1 = an, .n1 = 1, 227 .d1 = "", .a1 = "+n" }, 228 { .suf = "evit", .p1 = tion, .n1 = 3, 229 .d1 = "-e+ive", .a1 = "+ive" }, 230 { .suf = "ezi", .p1 = CCe, .n1 = 3, 231 .d1 = "-e+ize", .a1 = "+ize" }, 232 { .suf = "pihs", .p1 = strip, .n1 = 4, 233 .d1 = "", .a1 = "+ship" }, 234 { .suf = "dooh", .p1 = ily, .n1 = 4, 235 .d1 = "-y+hood", .a1 = "+hood" }, 236 { .suf = "ekil", .p1 = strip, .n1 = 4, 237 .d1 = "", .a1 = "+like" }, 238 { .suf = NULL, } 239}; 240 241static const char *preftab[] = { 242 "anti", 243 "bio", 244 "dis", 245 "electro", 246 "en", 247 "fore", 248 "hyper", 249 "intra", 250 "inter", 251 "iso", 252 "kilo", 253 "magneto", 254 "meta", 255 "micro", 256 "milli", 257 "mis", 258 "mono", 259 "multi", 260 "non", 261 "out", 262 "over", 263 "photo", 264 "poly", 265 "pre", 266 "pseudo", 267 "re", 268 "semi", 269 "stereo", 270 "sub", 271 "super", 272 "thermo", 273 "ultra", 274 "under", /* must precede un */ 275 "un", 276 NULL 277}; 278 279static struct wlist { 280 int fd; 281 unsigned char *front; 282 unsigned char *back; 283} *wlists; 284 285static int vflag; 286static int xflag; 287static char word[LINE_MAX]; 288static char original[LINE_MAX]; 289static char affix[LINE_MAX]; 290static struct { 291 const char **buf; 292 size_t maxlev; 293} deriv; 294 295/* 296 * The spellprog utility accepts a newline-delimited list of words 297 * on stdin. For arguments it expects the path to a word list and 298 * the path to a file in which to store found words. 299 * 300 * In normal usage, spell is called twice. The first time it is 301 * called with a stop list to flag commonly mispelled words. The 302 * remaining words are then passed to spell again, this time with 303 * the dictionary file as the first (non-flag) argument. 304 * 305 * Unlike historic versions of spellprog, this one does not use 306 * hashed files. Instead it simply requires that files be sorted 307 * lexigraphically and uses the same algorithm as the look utility. 308 * 309 * Note that spellprog should be called via the spell shell script 310 * and is not meant to be invoked directly by the user. 311 */ 312 313int 314main(int argc, char **argv) 315{ 316 char *ep, *cp, *dp; 317 char *outfile; 318 int ch, fold, i; 319 struct stat sb; 320 FILE *file, *found; 321 322 setlocale(LC_ALL, ""); 323 324 outfile = NULL; 325 while ((ch = getopt(argc, argv, "bvxo:")) != -1) { 326 switch (ch) { 327 case 'b': 328 /* Use British dictionary and convert ize -> ise. */ 329 ise(); 330 break; 331 case 'o': 332 outfile = optarg; 333 break; 334 case 'v': 335 /* Also write derivations to "found" file. */ 336 vflag++; 337 break; 338 case 'x': 339 /* Print plausible stems to stdout. */ 340 xflag++; 341 break; 342 default: 343 usage(); 344 } 345 346 } 347 argc -= optind; 348 argv += optind; 349 if (argc < 1) 350 usage(); 351 352 /* Open and mmap the word/stop lists. */ 353 if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL) 354 err(1, "malloc"); 355 356 for (i = 0; argc--; i++) { 357 wlists[i].fd = open(argv[i], O_RDONLY, 0); 358 if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0) 359 err(1, "%s", argv[i]); 360 if (sb.st_size > SIZE_T_MAX) 361 errx(1, "%s: %s", argv[i], strerror(EFBIG)); 362 wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ, 363 MAP_PRIVATE, wlists[i].fd, (off_t)0); 364 if (wlists[i].front == MAP_FAILED) 365 err(1, "%s", argv[i]); 366 wlists[i].back = wlists[i].front + (size_t)sb.st_size; 367 } 368 wlists[i].fd = -1; 369 370 /* Open file where found words are to be saved. */ 371 if (outfile == NULL) 372 found = NULL; 373 else if ((found = fopen(outfile, "w")) == NULL) 374 err(1, "cannot open %s", outfile); 375 376 for (;; print_word(file)) { 377 affix[0] = '\0'; 378 file = found; 379 for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) { 380 if (ep - word == sizeof(word) - 1) { 381 *ep = '\0'; 382 warnx("word too long (%s)", word); 383 while ((ch = getchar()) != '\n') 384 ; /* slurp until EOL */ 385 } 386 if (ch == EOF) { 387 if (found != NULL) 388 fclose(found); 389 exit(0); 390 } 391 } 392 for (cp = word, dp = original; cp < ep; ) 393 *dp++ = *cp++; 394 *dp = '\0'; 395 fold = 0; 396 for (cp = word; cp < ep; cp++) 397 if (islower((unsigned char)*cp)) 398 goto lcase; 399 if (trypref(ep, ".", 0)) 400 continue; 401 ++fold; 402 for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++) 403 *dp = tolower((unsigned char)*cp); 404lcase: 405 if (trypref(ep, ".", 0) || suffix(ep, 0)) 406 continue; 407 if (isupper((unsigned char)word[0])) { 408 for (cp = original, dp = word; (*dp = *cp++); dp++) { 409 if (fold) 410 *dp = tolower((unsigned char)*dp); 411 } 412 word[0] = tolower((unsigned char)word[0]); 413 goto lcase; 414 } 415 file = stdout; 416 } 417} 418 419static void 420print_word(FILE *f) 421{ 422 423 if (f != NULL) { 424 if (vflag && affix[0] != '\0' && affix[0] != '.') 425 fprintf(f, "%s\t%s\n", affix, original); 426 else 427 fprintf(f, "%s\n", original); 428 } 429} 430 431/* 432 * For each matching suffix in suftab, call the function associated 433 * with that suffix (p1 and p2). 434 */ 435static int 436suffix(char *ep, size_t lev) 437{ 438 const struct suftab *t; 439 char *cp; 440 const char *sp; 441 442 lev += DLEV; 443 getderiv(lev + 1); 444 deriv.buf[lev] = deriv.buf[lev - 1] = 0; 445 for (t = suftab; (sp = t->suf) != NULL; t++) { 446 cp = ep; 447 while (*sp) { 448 if (*--cp != *sp++) 449 goto next; 450 } 451 for (sp = cp; --sp >= word && !vowel(*sp);) 452 ; /* nothing */ 453 if (sp < word) 454 return 0; 455 if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1)) 456 return 1; 457 if (t->p2 != NULL) { 458 deriv.buf[lev] = deriv.buf[lev + 1] = NULL; 459 return (*t->p2)(ep - t->n2, t->d2, t->a2, lev); 460 } 461 return 0; 462next: ; 463 } 464 return 0; 465} 466 467static int 468/*ARGSUSED*/ 469nop(char *ep, const char *d, const char *a, size_t lev) 470{ 471 472 return 0; 473} 474 475static int 476/*ARGSUSED*/ 477strip(char *ep, const char *d, const char *a, size_t lev) 478{ 479 480 return trypref(ep, a, lev) || suffix(ep, lev); 481} 482 483static int 484s(char *ep, const char *d, const char *a, const size_t lev) 485{ 486 487 if (lev > DLEV + 1) 488 return 0; 489 if (*ep == 's' && ep[-1] == 's') 490 return 0; 491 return strip(ep, d, a, lev); 492} 493 494static int 495/*ARGSUSED*/ 496an(char *ep, const char *d, const char *a, size_t lev) 497{ 498 499 if (!isupper((unsigned char)*word)) /* must be proper name */ 500 return 0; 501 return trypref(ep, a, lev); 502} 503 504static int 505/*ARGSUSED*/ 506ize(char *ep, const char *d, const char *a, size_t lev) 507{ 508 509 *ep++ = 'e'; 510 return strip(ep ,"", d, lev); 511} 512 513static int 514/*ARGSUSED*/ 515y_to_e(char *ep, const char *d, const char *a, size_t lev) 516{ 517 char c = *ep; 518 519 *ep++ = 'e'; 520 if (strip(ep, "", d, lev)) 521 return 1; 522 ep[-1] = c; 523 return 0; 524} 525 526static int 527ily(char *ep, const char *d, const char *a, size_t lev) 528{ 529 530 if (ep[-1] == 'i') 531 return i_to_y(ep, d, a, lev); 532 else 533 return strip(ep, d, a, lev); 534} 535 536static int 537ncy(char *ep, const char *d, const char *a, size_t lev) 538{ 539 540 if (skipv(skipv(ep - 1)) < word) 541 return 0; 542 ep[-1] = 't'; 543 return strip(ep, d, a, lev); 544} 545 546static int 547bility(char *ep, const char *d, const char *a, size_t lev) 548{ 549 550 *ep++ = 'l'; 551 return y_to_e(ep, d, a, lev); 552} 553 554static int 555i_to_y(char *ep, const char *d, const char *a, size_t lev) 556{ 557 558 if (ep[-1] == 'i') { 559 ep[-1] = 'y'; 560 a = d; 561 } 562 return strip(ep, "", a, lev); 563} 564 565static int 566es(char *ep, const char *d, const char *a, size_t lev) 567{ 568 569 if (lev > DLEV) 570 return 0; 571 572 switch (ep[-1]) { 573 default: 574 return 0; 575 case 'i': 576 return i_to_y(ep, d, a, lev); 577 case 's': 578 case 'h': 579 case 'z': 580 case 'x': 581 return strip(ep, d, a, lev); 582 } 583} 584 585static int 586metry(char *ep, const char *d, const char *a, size_t lev) 587{ 588 589 ep[-2] = 'e'; 590 ep[-1] = 'r'; 591 return strip(ep, d, a, lev); 592} 593 594static int 595tion(char *ep, const char *d, const char *a, size_t lev) 596{ 597 598 switch (ep[-2]) { 599 case 'c': 600 case 'r': 601 return trypref(ep, a, lev); 602 case 'a': 603 return y_to_e(ep, d, a, lev); 604 } 605 return 0; 606} 607 608/* 609 * Possible consonant-consonant-e ending. 610 */ 611static int 612CCe(char *ep, const char *d, const char *a, size_t lev) 613{ 614 615 switch (ep[-1]) { 616 case 'l': 617 if (vowel(ep[-2])) 618 break; 619 switch (ep[-2]) { 620 case 'l': 621 case 'r': 622 case 'w': 623 break; 624 default: 625 return y_to_e(ep, d, a, lev); 626 } 627 break; 628 case 's': 629 if (ep[-2] == 's') 630 break; 631 /*FALLTHROUGH*/ 632 case 'c': 633 case 'g': 634 if (*ep == 'a') 635 return 0; 636 /*FALLTHROUGH*/ 637 case 'v': 638 case 'z': 639 if (vowel(ep[-2])) 640 break; 641 /*FALLTHROUGH*/ 642 case 'u': 643 if (y_to_e(ep, d, a, lev)) 644 return 1; 645 if (!(ep[-2] == 'n' && ep[-1] == 'g')) 646 return 0; 647 } 648 return VCe(ep, d, a, lev); 649} 650 651/* 652 * Possible consonant-vowel-consonant-e ending. 653 */ 654static int 655VCe(char *ep, const char *d, const char *a, size_t lev) 656{ 657 char c; 658 659 c = ep[-1]; 660 if (c == 'e') 661 return 0; 662 if (!vowel(c) && vowel(ep[-2])) { 663 c = *ep; 664 *ep++ = 'e'; 665 if (trypref(ep, d, lev) || suffix(ep, lev)) 666 return 1; 667 ep--; 668 *ep = c; 669 } 670 return strip(ep, d, a, lev); 671} 672 673static const char * 674lookuppref(char **wp, char *ep) 675{ 676 const char **sp, *cp; 677 char *bp; 678 679 for (sp = preftab; *sp; sp++) { 680 bp = *wp; 681 for (cp = *sp; *cp; cp++, bp++) { 682 if (tolower((unsigned char)*bp) != *cp) 683 goto next; 684 } 685 for (cp = bp; cp < ep; cp++) { 686 if (vowel(*cp)) { 687 *wp = bp; 688 return *sp; 689 } 690 } 691next: ; 692 } 693 return 0; 694} 695 696/* 697 * If the word is not in the dictionary, try stripping off prefixes 698 * until the word is found or we run out of prefixes to check. 699 */ 700static int 701trypref(char *ep, const char *a, size_t lev) 702{ 703 const char *cp; 704 char *bp; 705 char *pp; 706 int val = 0; 707 char space[20]; 708 709 getderiv(lev + 2); 710 deriv.buf[lev] = a; 711 if (tryword(word, ep, lev)) 712 return 1; 713 bp = word; 714 pp = space; 715 deriv.buf[lev + 1] = pp; 716 while ((cp = lookuppref(&bp, ep)) != NULL) { 717 *pp++ = '+'; 718 while ((*pp = *cp++)) 719 pp++; 720 if (tryword(bp, ep, lev + 1)) { 721 val = 1; 722 break; 723 } 724 if (pp - space >= sizeof(space)) 725 return 0; 726 } 727 deriv.buf[lev + 1] = deriv.buf[lev + 2] = NULL; 728 return val; 729} 730 731static int 732tryword(char *bp, char *ep, size_t lev) 733{ 734 size_t i, j; 735 char duple[3]; 736 737 if (ep-bp <= 1) 738 return 0; 739 if (vowel(*ep) && monosyl(bp, ep)) 740 return 0; 741 742 i = dict(bp, ep); 743 if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && 744 monosyl(bp, ep - 1)) { 745 ep--; 746 getderiv(++lev); 747 deriv.buf[lev] = duple; 748 duple[0] = '+'; 749 duple[1] = *ep; 750 duple[2] = '\0'; 751 i = dict(bp, ep); 752 } 753 if (vflag == 0 || i == 0) 754 return i; 755 756 /* Also tack on possible derivations. (XXX - warn on truncation?) */ 757 for (j = lev; j > 0; j--) { 758 if (deriv.buf[j]) 759 (void)strlcat(affix, deriv.buf[j], sizeof(affix)); 760 } 761 return i; 762} 763 764static int 765monosyl(char *bp, char *ep) 766{ 767 768 if (ep < bp + 2) 769 return 0; 770 if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w') 771 return 0; 772 while (--ep >= bp) 773 if (vowel(*ep)) 774 return 0; 775 return 1; 776} 777 778static char * 779skipv(char *st) 780{ 781 782 if (st >= word && vowel(*st)) 783 st--; 784 while (st >= word && !vowel(*st)) 785 st--; 786 return st; 787} 788 789static int 790vowel(int c) 791{ 792 793 switch (tolower(c)) { 794 case 'a': 795 case 'e': 796 case 'i': 797 case 'o': 798 case 'u': 799 case 'y': 800 return 1; 801 } 802 return 0; 803} 804 805/* 806 * Crummy way to Britishise. 807 */ 808static void 809ise(void) 810{ 811 struct suftab *tab; 812 char *cp; 813 814 for (tab = suftab; tab->suf; tab++) { 815 /* Assume that suffix will contain 'z' if a1 or d1 do */ 816 if (strchr(tab->suf, 'z')) { 817 tab->suf = cp = estrdup(tab->suf); 818 ztos(cp); 819 if (strchr(tab->d1, 'z')) { 820 tab->d1 = cp = estrdup(tab->d1); 821 ztos(cp); 822 } 823 if (strchr(tab->a1, 'z')) { 824 tab->a1 = cp = estrdup(tab->a1); 825 ztos(cp); 826 } 827 } 828 } 829} 830 831static void 832ztos(char *st) 833{ 834 835 for (; *st; st++) 836 if (*st == 'z') 837 *st = 's'; 838} 839 840/* 841 * Look up a word in the dictionary. 842 * Returns 1 if found, 0 if not. 843 */ 844static int 845dict(char *bp, char *ep) 846{ 847 char c; 848 int i, rval; 849 850 c = *ep; 851 *ep = '\0'; 852 if (xflag) 853 printf("=%s\n", bp); 854 for (i = rval = 0; wlists[i].fd != -1; i++) { 855 if ((rval = look((unsigned char *)bp, wlists[i].front, 856 wlists[i].back)) == 1) 857 break; 858 } 859 *ep = c; 860 return rval; 861} 862 863static void 864getderiv(size_t lev) 865{ 866 if (deriv.maxlev < lev) { 867 if (reallocarr(&deriv.buf, lev, sizeof(*deriv.buf)) != 0) 868 err(1, "Cannot grow array"); 869 deriv.maxlev = lev; 870 } 871} 872 873 874static void 875usage(void) 876{ 877 (void)fprintf(stderr, 878 "Usage: %s [-bvx] [-o found-words] word-list ...\n", 879 getprogname()); 880 exit(1); 881} 882