locate.bigram.c revision 18300
1207753Smm/*
2207753Smm * Copyright (c) 1989, 1993
3207753Smm *	The Regents of the University of California.  All rights reserved.
4207753Smm *
5207753Smm * This code is derived from software contributed to Berkeley by
6207753Smm * James A. Woods.
7207753Smm *
8207753Smm * Redistribution and use in source and binary forms, with or without
9207753Smm * modification, are permitted provided that the following conditions
10207753Smm * are met:
11207753Smm * 1. Redistributions of source code must retain the above copyright
12207753Smm *    notice, this list of conditions and the following disclaimer.
13207753Smm * 2. Redistributions in binary form must reproduce the above copyright
14207753Smm *    notice, this list of conditions and the following disclaimer in the
15207753Smm *    documentation and/or other materials provided with the distribution.
16207753Smm * 3. All advertising materials mentioning features or use of this software
17207753Smm *    must display the following acknowledgement:
18207753Smm *	This product includes software developed by the University of
19207753Smm *	California, Berkeley and its contributors.
20207753Smm * 4. Neither the name of the University nor the names of its contributors
21207753Smm *    may be used to endorse or promote products derived from this software
22207753Smm *    without specific prior written permission.
23207753Smm *
24207753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25207753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26207753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27207753Smm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28207753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29207753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30207753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31207753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32207753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33278433Srpaulo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34207753Smm * SUCH DAMAGE.
35207753Smm *
36207753Smm * 	$Id: locate.bigram.c,v 1.1 1996/09/13 13:23:48 wosch Exp wosch $
37207753Smm */
38207753Smm
39207753Smm#ifndef lint
40207753Smmstatic char copyright[] =
41207753Smm"@(#) Copyright (c) 1989, 1993\n\
42207753Smm	The Regents of the University of California.  All rights reserved.\n";
43207753Smm#endif /* not lint */
44207753Smm
45207753Smm#ifndef lint
46207753Smmstatic char sccsid[] = "@(#)locate.bigram.c	8.1 (Berkeley) 6/6/93";
47207753Smm#endif /* not lint */
48207753Smm
49207753Smm/*
50207753Smm *  bigram < sorted_file_names | sort -nr |
51207753Smm *  	awk 'NR <= 128 { printf $2 }' > bigrams
52207753Smm *
53207753Smm * List bigrams for 'updatedb' script.
54207753Smm * Use 'code' to encode a file using this output.
55207753Smm */
56207753Smm
57262754Sdelphij#include <stdio.h>
58207753Smm#include <sys/param.h>			/* for MAXPATHLEN */
59207753Smm#include "locate.h"
60207753Smm
61207753Smmu_char buf1[MAXPATHLEN] = " ";
62207753Smmu_char buf2[MAXPATHLEN];
63207753Smmu_int bigram[UCHAR_MAX][UCHAR_MAX];
64207753Smm
65207753Smmint
66207753Smmmain(void)
67207753Smm{
68207753Smm  	register u_char *cp;
69207753Smm	register u_char *oldpath = buf1, *path = buf2;
70207753Smm	register u_int i, j;
71207753Smm
72207753Smm     	while (fgets(path, sizeof(buf2), stdin) != NULL) {
73207753Smm
74207753Smm		/*
75207753Smm		 * We don't need remove newline character '\n'.
76207753Smm		 * '\n' is less than ASCII_MIN and will be later
77207753Smm		 * ignored at output.
78207753Smm		 */
79207753Smm
80207753Smm
81207753Smm		/* skip longest common prefix */
82207753Smm		for (cp = path; *cp == *oldpath; cp++, oldpath++)
83207753Smm			if (*cp == '\0')
84207753Smm				break;
85207753Smm
86207753Smm		while (*cp != '\0' && *(cp + 1) != '\0') {
87207753Smm			bigram[(u_int)*cp][(u_int)*(cp + 1)]++;
88207753Smm			cp += 2;
89207753Smm		}
90207753Smm
91207753Smm		/* swap pointers */
92207753Smm		if (path == buf1) {
93207753Smm			path = buf2;
94207753Smm			oldpath = buf1;
95207753Smm		} else {
96207753Smm			path = buf1;
97207753Smm			oldpath = buf2;
98207753Smm		}
99207753Smm   	}
100207753Smm
101207753Smm	/* output, boundary check */
102207753Smm	for (i = ASCII_MIN; i <= ASCII_MAX; i++)
103278433Srpaulo		for (j = ASCII_MIN; j <= ASCII_MAX; j++)
104207753Smm			if (bigram[i][j] != 0)
105207753Smm				(void)printf("%4u %c%c\n", bigram[i][j], i, j);
106207753Smm
107207753Smm	exit(0);
108207753Smm}
109207753Smm