locate.bigram.c revision 50477
1193323Sed/*
2193323Sed * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
3193323Sed * Copyright (c) 1989, 1993
4193323Sed *	The Regents of the University of California.  All rights reserved.
5193323Sed *
6193323Sed * This code is derived from software contributed to Berkeley by
7193323Sed * James A. Woods.
8193323Sed *
9193323Sed * Redistribution and use in source and binary forms, with or without
10193323Sed * modification, are permitted provided that the following conditions
11193323Sed * are met:
12193323Sed * 1. Redistributions of source code must retain the above copyright
13193323Sed *    notice, this list of conditions and the following disclaimer.
14193323Sed * 2. Redistributions in binary form must reproduce the above copyright
15193323Sed *    notice, this list of conditions and the following disclaimer in the
16193323Sed *    documentation and/or other materials provided with the distribution.
17193323Sed * 3. All advertising materials mentioning features or use of this software
18193323Sed *    must display the following acknowledgement:
19193323Sed *	This product includes software developed by the University of
20193323Sed *	California, Berkeley and its contributors.
21193323Sed * 4. Neither the name of the University nor the names of its contributors
22203954Srdivacky *    may be used to endorse or promote products derived from this software
23193323Sed *    without specific prior written permission.
24193323Sed *
25193323Sed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28193323Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29193323Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30193323Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31193323Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32193323Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33193323Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34193323Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35193323Sed * SUCH DAMAGE.
36193323Sed *
37193323Sed * $FreeBSD: head/usr.bin/locate/bigram/locate.bigram.c 50477 1999-08-28 01:08:13Z peter $
38193323Sed */
39193323Sed
40193323Sed#ifndef lint
41193323Sedstatic char copyright[] =
42193323Sed"@(#) Copyright (c) 1989, 1993\n\
43193323Sed	The Regents of the University of California.  All rights reserved.\n";
44193323Sed#endif /* not lint */
45193323Sed
46193323Sed#ifndef lint
47193323Sedstatic char sccsid[] = "@(#)locate.bigram.c	8.1 (Berkeley) 6/6/93";
48193323Sed#endif /* not lint */
49193323Sed
50193323Sed/*
51193323Sed *  bigram < sorted_file_names | sort -nr |
52193323Sed *  	awk 'NR <= 128 { printf $2 }' > bigrams
53193323Sed *
54193323Sed * List bigrams for 'updatedb' script.
55193323Sed * Use 'code' to encode a file using this output.
56193323Sed */
57193323Sed
58193323Sed#include <stdio.h>
59193323Sed#include <sys/param.h>			/* for MAXPATHLEN */
60193323Sed#include "locate.h"
61193323Sed
62193323Sedu_char buf1[MAXPATHLEN] = " ";
63193323Sedu_char buf2[MAXPATHLEN];
64193323Sedu_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1];
65193323Sed
66193323Sedint
67193323Sedmain(void)
68193323Sed{
69193323Sed  	register u_char *cp;
70193323Sed	register u_char *oldpath = buf1, *path = buf2;
71193323Sed	register u_int i, j;
72193323Sed
73193323Sed     	while (fgets(path, sizeof(buf2), stdin) != NULL) {
74193323Sed
75193323Sed		/*
76193323Sed		 * We don't need remove newline character '\n'.
77193323Sed		 * '\n' is less than ASCII_MIN and will be later
78193323Sed		 * ignored at output.
79193323Sed		 */
80193323Sed
81193323Sed
82193323Sed		/* skip longest common prefix */
83193323Sed		for (cp = path; *cp == *oldpath; cp++, oldpath++)
84193323Sed			if (*cp == '\0')
85193323Sed				break;
86193323Sed
87193323Sed		while (*cp != '\0' && *(cp + 1) != '\0') {
88193323Sed			bigram[(u_char)*cp][(u_char)*(cp + 1)]++;
89198090Srdivacky			cp += 2;
90193323Sed		}
91193323Sed
92193323Sed		/* swap pointers */
93193323Sed		if (path == buf1) {
94193323Sed			path = buf2;
95193323Sed			oldpath = buf1;
96193323Sed		} else {
97193323Sed			path = buf1;
98193323Sed			oldpath = buf2;
99193323Sed		}
100193323Sed   	}
101193323Sed
102193323Sed	/* output, boundary check */
103193323Sed	for (i = ASCII_MIN; i <= ASCII_MAX; i++)
104193323Sed		for (j = ASCII_MIN; j <= ASCII_MAX; j++)
105193323Sed			if (bigram[i][j] != 0)
106193323Sed				(void)printf("%4u %c%c\n", bigram[i][j], i, j);
107198090Srdivacky
108193323Sed	exit(0);
109193323Sed}
110193323Sed