locate.bigram.c revision 17972
1139823Simp/* 254263Sshin * Copyright (c) 1989, 1993 354263Sshin * The Regents of the University of California. All rights reserved. 454263Sshin * 554263Sshin * This code is derived from software contributed to Berkeley by 654263Sshin * James A. Woods. 754263Sshin * 854263Sshin * Redistribution and use in source and binary forms, with or without 954263Sshin * modification, are permitted provided that the following conditions 1054263Sshin * are met: 1154263Sshin * 1. Redistributions of source code must retain the above copyright 1254263Sshin * notice, this list of conditions and the following disclaimer. 1354263Sshin * 2. Redistributions in binary form must reproduce the above copyright 1454263Sshin * notice, this list of conditions and the following disclaimer in the 1554263Sshin * documentation and/or other materials provided with the distribution. 1654263Sshin * 3. All advertising materials mentioning features or use of this software 1754263Sshin * must display the following acknowledgement: 1854263Sshin * This product includes software developed by the University of 1954263Sshin * California, Berkeley and its contributors. 2054263Sshin * 4. Neither the name of the University nor the names of its contributors 2154263Sshin * may be used to endorse or promote products derived from this software 2254263Sshin * without specific prior written permission. 2354263Sshin * 2454263Sshin * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2554263Sshin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2654263Sshin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2754263Sshin * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28273087Sae * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29273087Sae * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3054263Sshin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3154263Sshin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32273087Sae * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33273087Sae * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34273087Sae * SUCH DAMAGE. 3554263Sshin * 3654263Sshin * $Id: locate.bigram.c,v 1.5 1996/08/30 03:06:15 peter Exp $ 3754263Sshin */ 3854263Sshin 3954263Sshin#ifndef lint 40219206Sbzstatic char copyright[] = 4154263Sshin"@(#) Copyright (c) 1989, 1993\n\ 42273087Sae The Regents of the University of California. All rights reserved.\n"; 4354263Sshin#endif /* not lint */ 4454263Sshin 45129880Sphk#ifndef lint 46273087Saestatic char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; 4754263Sshin#endif /* not lint */ 4854263Sshin 49273087Sae/* 5054263Sshin * bigram < sorted_file_names | sort -nr | 5154263Sshin * awk 'NR <= 128 { printf $2 }' > bigrams 5291270Sbrooks * 5354263Sshin * List bigrams for 'updatedb' script. 54193664Shrs * Use 'code' to encode a file using this output. 55178888Sjulian */ 5662587Sitojun 5779106Sbrooks#include <stdio.h> 5854263Sshin#include <sys/param.h> /* for MAXPATHLEN */ 5954263Sshin#include "locate.h" 6054263Sshin 61257176Sglebiusu_char buf1[MAXPATHLEN] = " "; 62130933Sbrooksu_char buf2[MAXPATHLEN]; 6354263Sshinu_int bigram[UCHAR_MAX][UCHAR_MAX]; 6454263Sshin 6554263Sshinint 6654263Sshinmain(void) 67196019Srwatson{ 6854263Sshin register u_char *cp; 6954263Sshin register u_char *oldpath = buf1, *path = buf2; 7054263Sshin register u_int i, j; 7178064Sume 72273087Sae while (fgets(path, sizeof(buf2), stdin) != NULL) { 7378064Sume 7454263Sshin /* skip empty lines */ 7579106Sbrooks if (*path == '\n') 7654263Sshin continue; 7754263Sshin 7854263Sshin /* Squelch characters that would botch the decoding. */ 7954263Sshin for (cp = path; *cp != '\0'; cp++) { 8054263Sshin /* chop newline */ 8154263Sshin if (*cp == '\n') 8254263Sshin *cp = '\0'; 8354263Sshin /* range */ 84273087Sae else if (*cp < ASCII_MIN || *cp > ASCII_MAX) 8554263Sshin *cp = '?'; 86148385Sume } 8762587Sitojun 8854263Sshin /* skip longest common prefix */ 8954263Sshin for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++); 9062587Sitojun 91153621Sthompsa while (*cp != '\0' && *(cp+1) != '\0') { 92153621Sthompsa bigram[*cp][*(cp+1)]++; 9354263Sshin cp += 2; 9454263Sshin } 95163606Srwatson 96163606Srwatson /* swap pointers */ 97241610Sglebius if (path == buf1) { 9862587Sitojun path = buf2; 99127305Srwatson oldpath = buf1; 100271917Shrs } else { 101127305Srwatson path = buf1; 102271917Shrs oldpath = buf2; 103271917Shrs } 10479106Sbrooks } 105215701Sdim 106195727Srwatson /* output, (paranoid) boundary check */ 107273087Sae for (i = ASCII_MIN; i <= ASCII_MAX; i++) 108273087Sae for (j = ASCII_MIN; j <= ASCII_MAX; j++) 109195699Srwatson if (bigram[i][j] != 0) 110271917Shrs printf("%4u %c%c\n", bigram[i][j], i, j); 111271917Shrs 112271917Shrs exit(0); 113271917Shrs} 114271917Shrs