1/* $NetBSD: mkdict.c,v 1.12 2021/05/02 12:50:44 rillig Exp $ */
2
3/*-
4 * Copyright (c) 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Barry Brachman.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef lint
36static const char copyright[] =
37    "@(#) Copyright (c) 1993\n\
38	The Regents of the University of California.  All rights reserved.\n";
39#if 0
40static char sccsid[] = "@(#)mkdict.c	8.1 (Berkeley) 6/11/93";
41#else
42static const char rcsid[] =
43    "$NetBSD: mkdict.c,v 1.12 2021/05/02 12:50:44 rillig Exp $";
44#endif
45#endif /* not lint */
46
47/*
48 * Filter out words that:
49 *	1) Are not completely made up of lower case letters
50 *	2) Contain a 'q' not immediately followed by a 'u'
51 *	3) Are less that 3 characters long
52 *	4) Are greater than MAXWORDLEN characters long
53 */
54
55#include <ctype.h>
56#include <stdio.h>
57#include <stdlib.h>
58#include <string.h>
59
60#include "bog.h"
61
62int
63main(int argc, char *argv[])
64{
65	char *p, *q;
66	int ch, common, nwords;
67	int current, len, prev, qcount;
68	char buf[2][MAXWORDLEN + 1];
69
70	prev = 0;
71	current = 1;
72	buf[prev][0] = '\0';
73
74	for (nwords = 1;
75	    fgets(buf[current], MAXWORDLEN + 1, stdin) != NULL; ++nwords) {
76		if ((p = strchr(buf[current], '\n')) == NULL) {
77			fprintf(stderr, "word too long: %s\n", buf[current]);
78			while ((ch = getc(stdin)) != EOF && ch != '\n')
79				;
80			if (ch == EOF)
81				break;
82			continue;
83		}
84		len = 0;
85		for (p = buf[current]; *p != '\n'; p++) {
86			if (!islower((unsigned char)*p))
87				break;
88			if (*p == 'q') {
89				q = p + 1;
90				if (*q != 'u')
91					break;
92				else {
93					while ((*q = *(q + 1)))
94						q++;
95				}
96				len++;
97			}
98			len++;
99		}
100		if (*p != '\n' || len < 3 || len > MAXWORDLEN)
101			continue;
102		if (argc == 2 && nwords % atoi(argv[1]))
103			continue;
104
105		*p = '\0';
106		p = buf[current];
107		q = buf[prev];
108		qcount = 0;
109		while ((ch = *p++) == *q++ && ch != '\0')
110			if (ch == 'q')
111				qcount++;
112		common = p - buf[current] - 1;
113		printf("%c%s", common + qcount, p - 1);
114		prev = !prev;
115		current = !current;
116	}
117	fprintf(stderr, "%d words\n", nwords);
118	fflush(stdout);
119	if (ferror(stdout)) {
120		perror("error writing standard output");
121		exit(1);
122	}
123	exit(0);
124}
125