1/*	$NetBSD: word.c,v 1.10 2021/05/02 12:50:43 rillig Exp $	*/
2
3/*-
4 * Copyright (c) 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Barry Brachman.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36#ifndef lint
37#if 0
38static char sccsid[] = "@(#)word.c	8.1 (Berkeley) 6/11/93";
39#else
40__RCSID("$NetBSD: word.c,v 1.10 2021/05/02 12:50:43 rillig Exp $");
41#endif
42#endif /* not lint */
43
44#include <sys/types.h>
45#include <sys/stat.h>
46
47#include <err.h>
48#include <stdio.h>
49#include <stdlib.h>
50#include <string.h>
51
52#include "bog.h"
53#include "extern.h"
54
55static char *dictspace, *dictend;
56static char *sp;
57
58static int first = 1, lastch = 0;
59
60extern struct dictindex dictindex[];
61extern int wordlen;
62
63/*
64 * Return the next word in the compressed dictionary in 'buffer' or
65 * NULL on end-of-file
66 */
67char *
68nextword(FILE *fp)
69{
70	int ch, pcount;
71	char *p;
72	static char buf[MAXWORDLEN + 1];
73
74	if (fp == NULL) {
75		if (sp == dictend)
76			return (NULL);
77
78		p = buf + (int) *sp++;
79
80		/*
81		 * The dictionary ends with a null byte
82		 */
83		while (*sp >= 'a')
84			if ((*p++ = *sp++) == 'q')
85				*p++ = 'u';
86	} else {
87		if (first) {
88			if ((pcount = getc(fp)) == EOF)
89				return (NULL);
90			first = 0;
91		} else if ((pcount = lastch) == EOF)
92			return (NULL);
93
94		p = buf + pcount;
95
96		while ((ch = getc(fp)) != EOF && ch >= 'a')
97			if ((*p++ = ch) == 'q')
98				*p++ = 'u';
99		lastch = ch;
100	}
101	wordlen = (int) (p - buf);
102	*p = '\0';
103	return (buf);
104}
105
106/*
107 * Reset the state of nextword() and do the fseek()
108 */
109long
110dictseek(FILE *fp, long offset, int ptrname)
111{
112	if (fp == NULL) {
113		if ((sp = dictspace + offset) >= dictend)
114			return (-1);
115		return (0);
116	}
117
118	first = 1;
119	return (fseek(fp, offset, ptrname));
120}
121
122FILE *
123opendict(const char *dict)
124{
125	FILE *fp;
126
127	if ((fp = fopen(dict, "r")) == NULL)
128		return (NULL);
129	return (fp);
130}
131
132/*
133 * Load the given dictionary and initialize the pointers
134 */
135int
136loaddict(FILE *fp)
137{
138	struct stat statb;
139	long n;
140	int st;
141	char *p;
142
143	if (fstat(fileno(fp), &statb) < 0) {
144		(void)fclose(fp);
145		return (-1);
146	}
147
148	/*
149	 * An extra character (a sentinel) is allocated and set to null
150	 * to improve the expansion loop in nextword().
151	 */
152	if ((dictspace = malloc(statb.st_size + 1)) == NULL) {
153		(void)fclose(fp);
154		return (-1);
155	}
156	n = (long)statb.st_size;
157	sp = dictspace;
158	dictend = dictspace + n;
159
160	p = dictspace;
161	st = -1;
162	while (n > 0 && (st = fread(p, 1, BUFSIZ, fp)) > 0) {
163		p += st;
164		n -= st;
165	}
166	if (st < 0) {
167		(void)fclose(fp);
168		warnx("Error reading dictionary");
169		return (-1);
170	}
171	*p = '\0';
172	return (0);
173}
174
175/*
176 * Dependent on the exact format of the index file:
177 * Starting offset field begins in column 1 and length field in column 9
178 * Taking the easy way out, the input buffer is made "large" and a check
179 * is made for lines that are too long
180 */
181int
182loadindex(const char *indexfile)
183{
184	int i, j;
185	char buf[BUFSIZ];
186	FILE *fp;
187
188	if ((fp = fopen(indexfile, "r")) == NULL) {
189		warn("Can't open '%s'", indexfile);
190		return (-1);
191	}
192	i = 0;
193	while (fgets(buf, sizeof(buf), fp) != NULL) {
194		if (strchr(buf, '\n') == NULL) {
195			warnx("A line in the index file is too long");
196			(void) fclose(fp);
197			return(-1);
198		}
199		j = *buf - 'a';
200		if (i != j) {
201		    warnx("Bad index order");
202		    (void) fclose(fp);
203		    return(-1);
204		}
205		dictindex[j].start = atol(buf + 1);
206		dictindex[j].length = atol(buf + 9) - dictindex[j].start;
207		i++;
208	}
209	(void) fclose(fp);
210	if (i != 26) {
211		warnx("Bad index length");
212		return(-1);
213	}
214	return(0);
215}
216