1/* $NetBSD: word.c,v 1.10 2021/05/02 12:50:43 rillig Exp $ */ 2 3/*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Barry Brachman. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36#ifndef lint 37#if 0 38static char sccsid[] = "@(#)word.c 8.1 (Berkeley) 6/11/93"; 39#else 40__RCSID("$NetBSD: word.c,v 1.10 2021/05/02 12:50:43 rillig Exp $"); 41#endif 42#endif /* not lint */ 43 44#include <sys/types.h> 45#include <sys/stat.h> 46 47#include <err.h> 48#include <stdio.h> 49#include <stdlib.h> 50#include <string.h> 51 52#include "bog.h" 53#include "extern.h" 54 55static char *dictspace, *dictend; 56static char *sp; 57 58static int first = 1, lastch = 0; 59 60extern struct dictindex dictindex[]; 61extern int wordlen; 62 63/* 64 * Return the next word in the compressed dictionary in 'buffer' or 65 * NULL on end-of-file 66 */ 67char * 68nextword(FILE *fp) 69{ 70 int ch, pcount; 71 char *p; 72 static char buf[MAXWORDLEN + 1]; 73 74 if (fp == NULL) { 75 if (sp == dictend) 76 return (NULL); 77 78 p = buf + (int) *sp++; 79 80 /* 81 * The dictionary ends with a null byte 82 */ 83 while (*sp >= 'a') 84 if ((*p++ = *sp++) == 'q') 85 *p++ = 'u'; 86 } else { 87 if (first) { 88 if ((pcount = getc(fp)) == EOF) 89 return (NULL); 90 first = 0; 91 } else if ((pcount = lastch) == EOF) 92 return (NULL); 93 94 p = buf + pcount; 95 96 while ((ch = getc(fp)) != EOF && ch >= 'a') 97 if ((*p++ = ch) == 'q') 98 *p++ = 'u'; 99 lastch = ch; 100 } 101 wordlen = (int) (p - buf); 102 *p = '\0'; 103 return (buf); 104} 105 106/* 107 * Reset the state of nextword() and do the fseek() 108 */ 109long 110dictseek(FILE *fp, long offset, int ptrname) 111{ 112 if (fp == NULL) { 113 if ((sp = dictspace + offset) >= dictend) 114 return (-1); 115 return (0); 116 } 117 118 first = 1; 119 return (fseek(fp, offset, ptrname)); 120} 121 122FILE * 123opendict(const char *dict) 124{ 125 FILE *fp; 126 127 if ((fp = fopen(dict, "r")) == NULL) 128 return (NULL); 129 return (fp); 130} 131 132/* 133 * Load the given dictionary and initialize the pointers 134 */ 135int 136loaddict(FILE *fp) 137{ 138 struct stat statb; 139 long n; 140 int st; 141 char *p; 142 143 if (fstat(fileno(fp), &statb) < 0) { 144 (void)fclose(fp); 145 return (-1); 146 } 147 148 /* 149 * An extra character (a sentinel) is allocated and set to null 150 * to improve the expansion loop in nextword(). 151 */ 152 if ((dictspace = malloc(statb.st_size + 1)) == NULL) { 153 (void)fclose(fp); 154 return (-1); 155 } 156 n = (long)statb.st_size; 157 sp = dictspace; 158 dictend = dictspace + n; 159 160 p = dictspace; 161 st = -1; 162 while (n > 0 && (st = fread(p, 1, BUFSIZ, fp)) > 0) { 163 p += st; 164 n -= st; 165 } 166 if (st < 0) { 167 (void)fclose(fp); 168 warnx("Error reading dictionary"); 169 return (-1); 170 } 171 *p = '\0'; 172 return (0); 173} 174 175/* 176 * Dependent on the exact format of the index file: 177 * Starting offset field begins in column 1 and length field in column 9 178 * Taking the easy way out, the input buffer is made "large" and a check 179 * is made for lines that are too long 180 */ 181int 182loadindex(const char *indexfile) 183{ 184 int i, j; 185 char buf[BUFSIZ]; 186 FILE *fp; 187 188 if ((fp = fopen(indexfile, "r")) == NULL) { 189 warn("Can't open '%s'", indexfile); 190 return (-1); 191 } 192 i = 0; 193 while (fgets(buf, sizeof(buf), fp) != NULL) { 194 if (strchr(buf, '\n') == NULL) { 195 warnx("A line in the index file is too long"); 196 (void) fclose(fp); 197 return(-1); 198 } 199 j = *buf - 'a'; 200 if (i != j) { 201 warnx("Bad index order"); 202 (void) fclose(fp); 203 return(-1); 204 } 205 dictindex[j].start = atol(buf + 1); 206 dictindex[j].length = atol(buf + 9) - dictindex[j].start; 207 i++; 208 } 209 (void) fclose(fp); 210 if (i != 26) { 211 warnx("Bad index length"); 212 return(-1); 213 } 214 return(0); 215} 216