11590Srgrimes/* 218905Swosch * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 31590Srgrimes * Copyright (c) 1989, 1993 41590Srgrimes * The Regents of the University of California. All rights reserved. 51590Srgrimes * 61590Srgrimes * This code is derived from software contributed to Berkeley by 71590Srgrimes * James A. Woods. 81590Srgrimes * 91590Srgrimes * Redistribution and use in source and binary forms, with or without 101590Srgrimes * modification, are permitted provided that the following conditions 111590Srgrimes * are met: 121590Srgrimes * 1. Redistributions of source code must retain the above copyright 131590Srgrimes * notice, this list of conditions and the following disclaimer. 141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151590Srgrimes * notice, this list of conditions and the following disclaimer in the 161590Srgrimes * documentation and/or other materials provided with the distribution. 171590Srgrimes * 3. All advertising materials mentioning features or use of this software 181590Srgrimes * must display the following acknowledgement: 191590Srgrimes * This product includes software developed by the University of 201590Srgrimes * California, Berkeley and its contributors. 211590Srgrimes * 4. Neither the name of the University nor the names of its contributors 221590Srgrimes * may be used to endorse or promote products derived from this software 231590Srgrimes * without specific prior written permission. 241590Srgrimes * 251590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 261590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 271590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 281590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 291590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 301590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 311590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 321590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 331590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 341590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 351590Srgrimes * SUCH DAMAGE. 3617776Swosch * 3750477Speter * $FreeBSD$ 381590Srgrimes */ 391590Srgrimes 40209571Sgavin#if 0 411590Srgrimes#ifndef lint 421590Srgrimesstatic char copyright[] = 431590Srgrimes"@(#) Copyright (c) 1989, 1993\n\ 441590Srgrimes The Regents of the University of California. All rights reserved.\n"; 451590Srgrimes#endif /* not lint */ 461590Srgrimes 471590Srgrimes#ifndef lint 481590Srgrimesstatic char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; 491590Srgrimes#endif /* not lint */ 50209571Sgavin#endif 511590Srgrimes 521590Srgrimes/* 5317776Swosch * bigram < sorted_file_names | sort -nr | 5417776Swosch * awk 'NR <= 128 { printf $2 }' > bigrams 558874Srgrimes * 561590Srgrimes * List bigrams for 'updatedb' script. 571590Srgrimes * Use 'code' to encode a file using this output. 581590Srgrimes */ 591590Srgrimes 601590Srgrimes#include <stdio.h> 61141563Sstefanf#include <stdlib.h> 621590Srgrimes#include <sys/param.h> /* for MAXPATHLEN */ 6317592Swosch#include "locate.h" 641590Srgrimes 6517592Swoschu_char buf1[MAXPATHLEN] = " "; 6617592Swoschu_char buf2[MAXPATHLEN]; 6718905Swoschu_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1]; 681590Srgrimes 6917776Swoschint 7017776Swoschmain(void) 711590Srgrimes{ 72209571Sgavin u_char *cp; 73209571Sgavin u_char *oldpath = buf1, *path = buf2; 74209571Sgavin u_int i, j; 751590Srgrimes 7617776Swosch while (fgets(path, sizeof(buf2), stdin) != NULL) { 7717592Swosch 7818300Swosch /* 7918300Swosch * We don't need remove newline character '\n'. 8018300Swosch * '\n' is less than ASCII_MIN and will be later 8118300Swosch * ignored at output. 8218300Swosch */ 8317592Swosch 8417592Swosch 851590Srgrimes /* skip longest common prefix */ 8618300Swosch for (cp = path; *cp == *oldpath; cp++, oldpath++) 8718300Swosch if (*cp == '\0') 8818300Swosch break; 8917592Swosch 9018300Swosch while (*cp != '\0' && *(cp + 1) != '\0') { 9118905Swosch bigram[(u_char)*cp][(u_char)*(cp + 1)]++; 9217592Swosch cp += 2; 931590Srgrimes } 9417592Swosch 9517776Swosch /* swap pointers */ 9617776Swosch if (path == buf1) { 9717776Swosch path = buf2; 9817776Swosch oldpath = buf1; 9917776Swosch } else { 10017776Swosch path = buf1; 10117776Swosch oldpath = buf2; 10217776Swosch } 1031590Srgrimes } 10417592Swosch 10518300Swosch /* output, boundary check */ 10617592Swosch for (i = ASCII_MIN; i <= ASCII_MAX; i++) 10717592Swosch for (j = ASCII_MIN; j <= ASCII_MAX; j++) 10817592Swosch if (bigram[i][j] != 0) 10918300Swosch (void)printf("%4u %c%c\n", bigram[i][j], i, j); 11017776Swosch 11117776Swosch exit(0); 1121590Srgrimes} 113