11590Srgrimes/*
218905Swosch * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
31590Srgrimes * Copyright (c) 1989, 1993
41590Srgrimes *	The Regents of the University of California.  All rights reserved.
51590Srgrimes *
61590Srgrimes * This code is derived from software contributed to Berkeley by
71590Srgrimes * James A. Woods.
81590Srgrimes *
91590Srgrimes * Redistribution and use in source and binary forms, with or without
101590Srgrimes * modification, are permitted provided that the following conditions
111590Srgrimes * are met:
121590Srgrimes * 1. Redistributions of source code must retain the above copyright
131590Srgrimes *    notice, this list of conditions and the following disclaimer.
141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151590Srgrimes *    notice, this list of conditions and the following disclaimer in the
161590Srgrimes *    documentation and/or other materials provided with the distribution.
171590Srgrimes * 3. All advertising materials mentioning features or use of this software
181590Srgrimes *    must display the following acknowledgement:
191590Srgrimes *	This product includes software developed by the University of
201590Srgrimes *	California, Berkeley and its contributors.
211590Srgrimes * 4. Neither the name of the University nor the names of its contributors
221590Srgrimes *    may be used to endorse or promote products derived from this software
231590Srgrimes *    without specific prior written permission.
241590Srgrimes *
251590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
261590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
271590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
281590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
291590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
301590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
311590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
321590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
331590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
341590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
351590Srgrimes * SUCH DAMAGE.
3617776Swosch *
3750477Speter * $FreeBSD$
381590Srgrimes */
391590Srgrimes
40209571Sgavin#if 0
411590Srgrimes#ifndef lint
421590Srgrimesstatic char copyright[] =
431590Srgrimes"@(#) Copyright (c) 1989, 1993\n\
441590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
451590Srgrimes#endif /* not lint */
461590Srgrimes
471590Srgrimes#ifndef lint
481590Srgrimesstatic char sccsid[] = "@(#)locate.bigram.c	8.1 (Berkeley) 6/6/93";
491590Srgrimes#endif /* not lint */
50209571Sgavin#endif
511590Srgrimes
521590Srgrimes/*
5317776Swosch *  bigram < sorted_file_names | sort -nr |
5417776Swosch *  	awk 'NR <= 128 { printf $2 }' > bigrams
558874Srgrimes *
561590Srgrimes * List bigrams for 'updatedb' script.
571590Srgrimes * Use 'code' to encode a file using this output.
581590Srgrimes */
591590Srgrimes
601590Srgrimes#include <stdio.h>
61141563Sstefanf#include <stdlib.h>
621590Srgrimes#include <sys/param.h>			/* for MAXPATHLEN */
6317592Swosch#include "locate.h"
641590Srgrimes
6517592Swoschu_char buf1[MAXPATHLEN] = " ";
6617592Swoschu_char buf2[MAXPATHLEN];
6718905Swoschu_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1];
681590Srgrimes
6917776Swoschint
7017776Swoschmain(void)
711590Srgrimes{
72209571Sgavin	u_char *cp;
73209571Sgavin	u_char *oldpath = buf1, *path = buf2;
74209571Sgavin	u_int i, j;
751590Srgrimes
7617776Swosch     	while (fgets(path, sizeof(buf2), stdin) != NULL) {
7717592Swosch
7818300Swosch		/*
7918300Swosch		 * We don't need remove newline character '\n'.
8018300Swosch		 * '\n' is less than ASCII_MIN and will be later
8118300Swosch		 * ignored at output.
8218300Swosch		 */
8317592Swosch
8417592Swosch
851590Srgrimes		/* skip longest common prefix */
8618300Swosch		for (cp = path; *cp == *oldpath; cp++, oldpath++)
8718300Swosch			if (*cp == '\0')
8818300Swosch				break;
8917592Swosch
9018300Swosch		while (*cp != '\0' && *(cp + 1) != '\0') {
9118905Swosch			bigram[(u_char)*cp][(u_char)*(cp + 1)]++;
9217592Swosch			cp += 2;
931590Srgrimes		}
9417592Swosch
9517776Swosch		/* swap pointers */
9617776Swosch		if (path == buf1) {
9717776Swosch			path = buf2;
9817776Swosch			oldpath = buf1;
9917776Swosch		} else {
10017776Swosch			path = buf1;
10117776Swosch			oldpath = buf2;
10217776Swosch		}
1031590Srgrimes   	}
10417592Swosch
10518300Swosch	/* output, boundary check */
10617592Swosch	for (i = ASCII_MIN; i <= ASCII_MAX; i++)
10717592Swosch		for (j = ASCII_MIN; j <= ASCII_MAX; j++)
10817592Swosch			if (bigram[i][j] != 0)
10918300Swosch				(void)printf("%4u %c%c\n", bigram[i][j], i, j);
11017776Swosch
11117776Swosch	exit(0);
1121590Srgrimes}
113