locate.bigram.c revision 17776
1100894Srwatson/* 2100894Srwatson * Copyright (c) 1989, 1993 3100894Srwatson * The Regents of the University of California. All rights reserved. 4100894Srwatson * 5100894Srwatson * This code is derived from software contributed to Berkeley by 6100894Srwatson * James A. Woods. 7100894Srwatson * 8100894Srwatson * Redistribution and use in source and binary forms, with or without 9100894Srwatson * modification, are permitted provided that the following conditions 10100894Srwatson * are met: 11100894Srwatson * 1. Redistributions of source code must retain the above copyright 12100894Srwatson * notice, this list of conditions and the following disclaimer. 13100894Srwatson * 2. Redistributions in binary form must reproduce the above copyright 14100894Srwatson * notice, this list of conditions and the following disclaimer in the 15100894Srwatson * documentation and/or other materials provided with the distribution. 16100894Srwatson * 3. All advertising materials mentioning features or use of this software 17100894Srwatson * must display the following acknowledgement: 18100894Srwatson * This product includes software developed by the University of 19100894Srwatson * California, Berkeley and its contributors. 20100894Srwatson * 4. Neither the name of the University nor the names of its contributors 21100894Srwatson * may be used to endorse or promote products derived from this software 22100894Srwatson * without specific prior written permission. 23100894Srwatson * 24100894Srwatson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25100894Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26100894Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27100894Srwatson * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28100894Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29100894Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30100894Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31100894Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32100894Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33100894Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34100894Srwatson * SUCH DAMAGE. 35100894Srwatson * 36100894Srwatson * $Id$ 37100894Srwatson */ 38100894Srwatson 39100894Srwatson#ifndef lint 40100894Srwatsonstatic char copyright[] = 41100894Srwatson"@(#) Copyright (c) 1989, 1993\n\ 42100894Srwatson The Regents of the University of California. All rights reserved.\n"; 43100894Srwatson#endif /* not lint */ 44100894Srwatson 45100894Srwatson#ifndef lint 46100894Srwatsonstatic char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; 47100894Srwatson#endif /* not lint */ 48100894Srwatson 49101173Srwatson/* 50100894Srwatson * bigram < sorted_file_names | sort -nr | 51100979Srwatson * awk 'NR <= 128 { printf $2 }' > bigrams 52100979Srwatson * 53100979Srwatson * List bigrams for 'updatedb' script. 54100979Srwatson * Use 'code' to encode a file using this output. 55100979Srwatson */ 56100979Srwatson 57101712Srwatson#include <stdio.h> 58100979Srwatson#include <sys/param.h> /* for MAXPATHLEN */ 59100979Srwatson#include <string.h> /* memchr */ 60100894Srwatson#include "locate.h" 61100894Srwatson 62100979Srwatsonu_char buf1[MAXPATHLEN] = " "; 63100979Srwatsonu_char buf2[MAXPATHLEN]; 64100979Srwatsonu_int bigram[UCHAR_MAX][UCHAR_MAX]; 65100979Srwatson 66100979Srwatsonint 67100979Srwatsonmain(void) 68100979Srwatson{ 69100979Srwatson register u_char *cp; 70100979Srwatson register u_char *oldpath = buf1, *path = buf2; 71100894Srwatson register u_int i, j; 72100979Srwatson 73100979Srwatson while (fgets(path, sizeof(buf2), stdin) != NULL) { 74100979Srwatson 75100979Srwatson /* skip empty lines */ 76100979Srwatson if (*path == '\n') 77100979Srwatson continue; 78100979Srwatson 79100979Srwatson /* Squelch characters that would botch the decoding. */ 80100979Srwatson for (cp = path; *cp != NULL; cp++) { 81100979Srwatson /* chop newline */ 82100979Srwatson if (*cp == '\n') 83100979Srwatson *cp = NULL; 84100979Srwatson /* range */ 85100979Srwatson else if (*cp < ASCII_MIN || *cp > ASCII_MAX) 86100979Srwatson *cp = '?'; 87100979Srwatson } 88100979Srwatson 89100979Srwatson /* skip longest common prefix */ 90100979Srwatson for (cp = path; *cp == *oldpath && *cp != NULL; cp++, oldpath++); 91101712Srwatson 92101712Srwatson while (*cp != NULL && *(cp+1) != NULL) { 93101712Srwatson bigram[*cp][*(cp+1)]++; 94101712Srwatson cp += 2; 95101712Srwatson } 96101712Srwatson 97101712Srwatson /* swap pointers */ 98100979Srwatson if (path == buf1) { 99100979Srwatson path = buf2; 100100979Srwatson oldpath = buf1; 101100979Srwatson } else { 102100979Srwatson path = buf1; 103100979Srwatson oldpath = buf2; 104100979Srwatson } 105100979Srwatson } 106100979Srwatson 107100979Srwatson /* output, (paranoid) boundary check */ 108100979Srwatson for (i = ASCII_MIN; i <= ASCII_MAX; i++) 109100979Srwatson for (j = ASCII_MIN; j <= ASCII_MAX; j++) 110100979Srwatson if (bigram[i][j] != 0) 111100979Srwatson printf("%4u %c%c\n", bigram[i][j], i, j); 112100979Srwatson 113100979Srwatson exit(0); 114100979Srwatson} 115100979Srwatson