1/* $OpenBSD: word.c,v 1.8 2016/01/10 13:18:07 mestre Exp $ */ 2/* $NetBSD: word.c,v 1.2 1995/03/21 12:14:45 cgd Exp $ */ 3 4/*- 5 * Copyright (c) 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Barry Brachman. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#include <sys/stat.h> 37 38#include <err.h> 39#include <stdio.h> 40#include <stdlib.h> 41#include <string.h> 42 43#include "bog.h" 44 45static char *dictspace, *dictend; 46static char *sp; 47 48static int first = 1, lastch = 0; 49 50/* 51 * Return the next word in the compressed dictionary in 'buffer' or 52 * NULL on end-of-file 53 */ 54char * 55nextword(FILE *fp) 56{ 57 extern int wordlen; 58 int ch, pcount; 59 char *p; 60 static char buf[MAXWORDLEN + 1]; 61 62 if (fp == NULL) { 63 if (sp == dictend) 64 return (NULL); 65 66 p = buf + (int) *sp++; 67 68 /* 69 * The dictionary ends with a null byte 70 */ 71 while (*sp >= 'a') 72 if ((*p++ = *sp++) == 'q') 73 *p++ = 'u'; 74 } else { 75 if (first) { 76 if ((pcount = getc(fp)) == EOF) 77 return (NULL); 78 first = 0; 79 } else if ((pcount = lastch) == EOF) 80 return (NULL); 81 82 p = buf + pcount; 83 84 while ((ch = getc(fp)) != EOF && ch >= 'a') 85 if ((*p++ = ch) == 'q') 86 *p++ = 'u'; 87 lastch = ch; 88 } 89 wordlen = (int) (p - buf); 90 *p = '\0'; 91 return (buf); 92} 93 94/* 95 * Reset the state of nextword() and do the fseek() 96 */ 97long 98dictseek(FILE *fp, long offset, int ptrname) 99{ 100 if (fp == NULL) { 101 if ((sp = dictspace + offset) >= dictend) 102 return (-1); 103 return (0); 104 } 105 106 first = 1; 107 return (fseek(fp, offset, ptrname)); 108} 109 110FILE * 111opendict(char *dict) 112{ 113 FILE *fp; 114 115 if ((fp = fopen(dict, "r")) == NULL) 116 return (NULL); 117 return (fp); 118} 119 120/* 121 * Load the given dictionary and initialize the pointers 122 */ 123int 124loaddict(FILE *fp) 125{ 126 struct stat statb; 127 long n; 128 int st; 129 char *p; 130 131 if (fstat(fileno(fp), &statb) < 0) { 132 (void)fclose(fp); 133 return (-1); 134 } 135 136 /* 137 * An extra character (a sentinel) is allocated and set to null 138 * to improve the expansion loop in nextword(). 139 */ 140 if ((dictspace = malloc(statb.st_size + 1)) == NULL) { 141 (void)fclose(fp); 142 return (-1); 143 } 144 n = (long)statb.st_size; 145 sp = dictspace; 146 dictend = dictspace + n; 147 148 p = dictspace; 149 st = -1; 150 while (n > 0 && (st = fread(p, 1, BUFSIZ, fp)) > 0) { 151 p += st; 152 n -= st; 153 } 154 if (st < 0) { 155 (void)fclose(fp); 156 warnx("Error reading dictionary"); 157 return (-1); 158 } 159 *p = '\0'; 160 return (0); 161} 162 163/* 164 * Dependent on the exact format of the index file: 165 * Starting offset field begins in column 1 and length field in column 9 166 * Taking the easy way out, the input buffer is made "large" and a check 167 * is made for lines that are too long 168 */ 169int 170loadindex(char *indexfile) 171{ 172 int i, j; 173 char buf[BUFSIZ]; 174 FILE *fp; 175 extern struct dictindex dictindex[]; 176 177 if ((fp = fopen(indexfile, "r")) == NULL) { 178 warnx("Can't open '%s'", indexfile); 179 return (-1); 180 } 181 i = 0; 182 while (fgets(buf, sizeof(buf), fp) != NULL) { 183 if (strchr(buf, '\n') == NULL) { 184 warnx("A line in the index file is too long"); 185 fclose(fp); 186 return(-1); 187 } 188 j = *buf - 'a'; 189 if (i != j) { 190 warnx("Bad index order"); 191 fclose(fp); 192 return(-1); 193 } 194 dictindex[j].start = atol(buf + 1); 195 dictindex[j].length = atol(buf + 9) - dictindex[j].start; 196 i++; 197 } 198 if (i != 26) { 199 warnx("Bad index length"); 200 fclose(fp); 201 return(-1); 202 } 203 (void) fclose(fp); 204 return(0); 205} 206