locate.bigram.c revision 17942
1100894Srwatson/* 2100894Srwatson * Copyright (c) 1989, 1993 3100894Srwatson * The Regents of the University of California. All rights reserved. 4100894Srwatson * 5100894Srwatson * This code is derived from software contributed to Berkeley by 6100894Srwatson * James A. Woods. 7100894Srwatson * 8100894Srwatson * Redistribution and use in source and binary forms, with or without 9100894Srwatson * modification, are permitted provided that the following conditions 10100894Srwatson * are met: 11100894Srwatson * 1. Redistributions of source code must retain the above copyright 12100894Srwatson * notice, this list of conditions and the following disclaimer. 13100894Srwatson * 2. Redistributions in binary form must reproduce the above copyright 14100894Srwatson * notice, this list of conditions and the following disclaimer in the 15100894Srwatson * documentation and/or other materials provided with the distribution. 16100894Srwatson * 3. All advertising materials mentioning features or use of this software 17100894Srwatson * must display the following acknowledgement: 18100894Srwatson * This product includes software developed by the University of 19100894Srwatson * California, Berkeley and its contributors. 20100894Srwatson * 4. Neither the name of the University nor the names of its contributors 21100894Srwatson * may be used to endorse or promote products derived from this software 22100894Srwatson * without specific prior written permission. 23100894Srwatson * 24100894Srwatson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25100894Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26100894Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27100894Srwatson * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28100894Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29100894Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30100894Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31100894Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32100894Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33100894Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34100894Srwatson * SUCH DAMAGE. 35100894Srwatson * 36100894Srwatson * $Id: locate.bigram.c,v 1.4 1996/08/22 18:46:11 wosch Exp $ 37100894Srwatson */ 38100894Srwatson 39100894Srwatson#ifndef lint 40100894Srwatsonstatic char copyright[] = 41100894Srwatson"@(#) Copyright (c) 1989, 1993\n\ 42100894Srwatson The Regents of the University of California. All rights reserved.\n"; 43100894Srwatson#endif /* not lint */ 44100894Srwatson 45100894Srwatson#ifndef lint 46100894Srwatsonstatic char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; 47100894Srwatson#endif /* not lint */ 48100894Srwatson 49101173Srwatson/* 50100894Srwatson * bigram < sorted_file_names | sort -nr | 51100979Srwatson * awk 'NR <= 128 { printf $2 }' > bigrams 52100979Srwatson * 53100979Srwatson * List bigrams for 'updatedb' script. 54100979Srwatson * Use 'code' to encode a file using this output. 55100979Srwatson */ 56100979Srwatson 57101712Srwatson#include <stdio.h> 58100979Srwatson#include <sys/param.h> /* for MAXPATHLEN */ 59100979Srwatson#include "locate.h" 60100894Srwatson 61100894Srwatsonu_char buf1[MAXPATHLEN] = " "; 62100979Srwatsonu_char buf2[MAXPATHLEN]; 63100979Srwatsonu_int bigram[UCHAR_MAX][UCHAR_MAX]; 64100979Srwatson 65100979Srwatsonint 66100979Srwatsonmain(void) 67100979Srwatson{ 68100979Srwatson register u_char *cp; 69100979Srwatson register u_char *oldpath = buf1, *path = buf2; 70100979Srwatson register u_int i, j; 71100894Srwatson 72100979Srwatson while (fgets(path, sizeof(buf2), stdin) != NULL) { 73100979Srwatson 74100979Srwatson /* skip empty lines */ 75100979Srwatson if (*path == '\n') 76100979Srwatson continue; 77100979Srwatson 78100979Srwatson /* Squelch characters that would botch the decoding. */ 79100979Srwatson for (cp = path; *cp != NULL; cp++) { 80100979Srwatson /* chop newline */ 81100979Srwatson if (*cp == '\n') 82100979Srwatson *cp = NULL; 83100979Srwatson /* range */ 84100979Srwatson else if (*cp < ASCII_MIN || *cp > ASCII_MAX) 85100979Srwatson *cp = '?'; 86100979Srwatson } 87100979Srwatson 88100979Srwatson /* skip longest common prefix */ 89100979Srwatson for (cp = path; *cp == *oldpath && *cp != NULL; cp++, oldpath++); 90100979Srwatson 91101712Srwatson while (*cp != NULL && *(cp+1) != NULL) { 92101712Srwatson bigram[*cp][*(cp+1)]++; 93101712Srwatson cp += 2; 94101712Srwatson } 95101712Srwatson 96101712Srwatson /* swap pointers */ 97101712Srwatson if (path == buf1) { 98100979Srwatson path = buf2; 99100979Srwatson oldpath = buf1; 100100979Srwatson } else { 101100979Srwatson path = buf1; 102100979Srwatson oldpath = buf2; 103100979Srwatson } 104100979Srwatson } 105100979Srwatson 106100979Srwatson /* output, (paranoid) boundary check */ 107100979Srwatson for (i = ASCII_MIN; i <= ASCII_MAX; i++) 108100979Srwatson for (j = ASCII_MIN; j <= ASCII_MAX; j++) 109100979Srwatson if (bigram[i][j] != 0) 110100979Srwatson printf("%4u %c%c\n", bigram[i][j], i, j); 111100979Srwatson 112100979Srwatson exit(0); 113100979Srwatson} 114100979Srwatson