locate.bigram.c revision 17776
1139804Simp/* 21541Srgrimes * Copyright (c) 1989, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * James A. Woods. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2922521Sdyson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3050477Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3222521Sdyson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3322521Sdyson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34159082Sdds * SUCH DAMAGE. 35159082Sdds * 36159082Sdds * $Id$ 37159082Sdds */ 38159082Sdds 39159082Sdds#ifndef lint 4022521Sdysonstatic char copyright[] = 4122521Sdyson"@(#) Copyright (c) 1989, 1993\n\ 4254444Seivind The Regents of the University of California. All rights reserved.\n"; 4354444Seivind#endif /* not lint */ 4454444Seivind 4554444Seivind#ifndef lint 4654444Seivindstatic char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; 4745058Seivind#endif /* not lint */ 4822521Sdyson 4922521Sdyson/* 5022521Sdyson * bigram < sorted_file_names | sort -nr | 5122521Sdyson * awk 'NR <= 128 { printf $2 }' > bigrams 52116615Sse * 53116615Sse * List bigrams for 'updatedb' script. 54116615Sse * Use 'code' to encode a file using this output. 55159082Sdds */ 56159082Sdds 57159082Sdds#include <stdio.h> 58116615Sse#include <sys/param.h> /* for MAXPATHLEN */ 59116615Sse#include <string.h> /* memchr */ 60116615Sse#include "locate.h" 6122521Sdyson 6251679Seivindu_char buf1[MAXPATHLEN] = " "; 6351679Seivindu_char buf2[MAXPATHLEN]; 6483366Sjulianu_int bigram[UCHAR_MAX][UCHAR_MAX]; 6551679Seivind 6651679Seivindint 67159082Sddsmain(void) 68159082Sdds{ 69159082Sdds register u_char *cp; 70159082Sdds register u_char *oldpath = buf1, *path = buf2; 71159082Sdds register u_int i, j; 7222521Sdyson 7322521Sdyson while (fgets(path, sizeof(buf2), stdin) != NULL) { 7422521Sdyson 75159082Sdds /* skip empty lines */ 761541Srgrimes if (*path == '\n') 771541Srgrimes continue; 781541Srgrimes 791541Srgrimes /* Squelch characters that would botch the decoding. */ 801541Srgrimes for (cp = path; *cp != NULL; cp++) { 811541Srgrimes /* chop newline */ 82159082Sdds if (*cp == '\n') 83159082Sdds *cp = NULL; 84159082Sdds /* range */ 8528732Sphk else if (*cp < ASCII_MIN || *cp > ASCII_MAX) 86159082Sdds *cp = '?'; 8728732Sphk } 8828732Sphk 8928732Sphk /* skip longest common prefix */ 9028732Sphk for (cp = path; *cp == *oldpath && *cp != NULL; cp++, oldpath++); 9128732Sphk 9228732Sphk while (*cp != NULL && *(cp+1) != NULL) { 93159082Sdds bigram[*cp][*(cp+1)]++; 94159082Sdds cp += 2; 95159082Sdds } 96159082Sdds 971541Srgrimes /* swap pointers */ 9835823Smsmith if (path == buf1) { 991541Srgrimes path = buf2; 1001541Srgrimes oldpath = buf1; 1011541Srgrimes } else { 1021541Srgrimes path = buf1; 1031541Srgrimes oldpath = buf2; 104159082Sdds } 105159082Sdds } 106159082Sdds 10722521Sdyson /* output, (paranoid) boundary check */ 10835823Smsmith for (i = ASCII_MIN; i <= ASCII_MAX; i++) 10922521Sdyson for (j = ASCII_MIN; j <= ASCII_MAX; j++) 11022521Sdyson if (bigram[i][j] != 0) 11122521Sdyson printf("%4u %c%c\n", bigram[i][j], i, j); 11222521Sdyson 113159082Sdds exit(0); 114159082Sdds} 115159082Sdds