locate.bigram.c revision 17972
1139823Simp/*
254263Sshin * Copyright (c) 1989, 1993
354263Sshin *	The Regents of the University of California.  All rights reserved.
454263Sshin *
554263Sshin * This code is derived from software contributed to Berkeley by
654263Sshin * James A. Woods.
754263Sshin *
854263Sshin * Redistribution and use in source and binary forms, with or without
954263Sshin * modification, are permitted provided that the following conditions
1054263Sshin * are met:
1154263Sshin * 1. Redistributions of source code must retain the above copyright
1254263Sshin *    notice, this list of conditions and the following disclaimer.
1354263Sshin * 2. Redistributions in binary form must reproduce the above copyright
1454263Sshin *    notice, this list of conditions and the following disclaimer in the
1554263Sshin *    documentation and/or other materials provided with the distribution.
1654263Sshin * 3. All advertising materials mentioning features or use of this software
1754263Sshin *    must display the following acknowledgement:
1854263Sshin *	This product includes software developed by the University of
1954263Sshin *	California, Berkeley and its contributors.
2054263Sshin * 4. Neither the name of the University nor the names of its contributors
2154263Sshin *    may be used to endorse or promote products derived from this software
2254263Sshin *    without specific prior written permission.
2354263Sshin *
2454263Sshin * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2554263Sshin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2654263Sshin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2754263Sshin * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28273087Sae * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29273087Sae * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3054263Sshin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3154263Sshin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32273087Sae * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33273087Sae * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34273087Sae * SUCH DAMAGE.
3554263Sshin *
3654263Sshin * 	$Id: locate.bigram.c,v 1.5 1996/08/30 03:06:15 peter Exp $
3754263Sshin */
3854263Sshin
3954263Sshin#ifndef lint
40219206Sbzstatic char copyright[] =
4154263Sshin"@(#) Copyright (c) 1989, 1993\n\
42273087Sae	The Regents of the University of California.  All rights reserved.\n";
4354263Sshin#endif /* not lint */
4454263Sshin
45129880Sphk#ifndef lint
46273087Saestatic char sccsid[] = "@(#)locate.bigram.c	8.1 (Berkeley) 6/6/93";
4754263Sshin#endif /* not lint */
4854263Sshin
49273087Sae/*
5054263Sshin *  bigram < sorted_file_names | sort -nr |
5154263Sshin *  	awk 'NR <= 128 { printf $2 }' > bigrams
5291270Sbrooks *
5354263Sshin * List bigrams for 'updatedb' script.
54193664Shrs * Use 'code' to encode a file using this output.
55178888Sjulian */
5662587Sitojun
5779106Sbrooks#include <stdio.h>
5854263Sshin#include <sys/param.h>			/* for MAXPATHLEN */
5954263Sshin#include "locate.h"
6054263Sshin
61257176Sglebiusu_char buf1[MAXPATHLEN] = " ";
62130933Sbrooksu_char buf2[MAXPATHLEN];
6354263Sshinu_int bigram[UCHAR_MAX][UCHAR_MAX];
6454263Sshin
6554263Sshinint
6654263Sshinmain(void)
67196019Srwatson{
6854263Sshin  	register u_char *cp;
6954263Sshin	register u_char *oldpath = buf1, *path = buf2;
7054263Sshin	register u_int i, j;
7178064Sume
72273087Sae     	while (fgets(path, sizeof(buf2), stdin) != NULL) {
7378064Sume
7454263Sshin	    	/* skip empty lines */
7579106Sbrooks		if (*path == '\n')
7654263Sshin			continue;
7754263Sshin
7854263Sshin		/* Squelch characters that would botch the decoding. */
7954263Sshin		for (cp = path; *cp != '\0'; cp++) {
8054263Sshin			/* chop newline */
8154263Sshin			if (*cp == '\n')
8254263Sshin				*cp = '\0';
8354263Sshin			/* range */
84273087Sae			else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
8554263Sshin				*cp = '?';
86148385Sume		}
8762587Sitojun
8854263Sshin		/* skip longest common prefix */
8954263Sshin		for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++);
9062587Sitojun
91153621Sthompsa		while (*cp != '\0' && *(cp+1) != '\0') {
92153621Sthompsa			bigram[*cp][*(cp+1)]++;
9354263Sshin			cp += 2;
9454263Sshin		}
95163606Srwatson
96163606Srwatson		/* swap pointers */
97241610Sglebius		if (path == buf1) {
9862587Sitojun			path = buf2;
99127305Srwatson			oldpath = buf1;
100271917Shrs		} else {
101127305Srwatson			path = buf1;
102271917Shrs			oldpath = buf2;
103271917Shrs		}
10479106Sbrooks   	}
105215701Sdim
106195727Srwatson	/* output, (paranoid) boundary check */
107273087Sae	for (i = ASCII_MIN; i <= ASCII_MAX; i++)
108273087Sae		for (j = ASCII_MIN; j <= ASCII_MAX; j++)
109195699Srwatson			if (bigram[i][j] != 0)
110271917Shrs				printf("%4u %c%c\n", bigram[i][j], i, j);
111271917Shrs
112271917Shrs	exit(0);
113271917Shrs}
114271917Shrs