locate.bigram.c revision 50477
1193323Sed/* 2193323Sed * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 3193323Sed * Copyright (c) 1989, 1993 4193323Sed * The Regents of the University of California. All rights reserved. 5193323Sed * 6193323Sed * This code is derived from software contributed to Berkeley by 7193323Sed * James A. Woods. 8193323Sed * 9193323Sed * Redistribution and use in source and binary forms, with or without 10193323Sed * modification, are permitted provided that the following conditions 11193323Sed * are met: 12193323Sed * 1. Redistributions of source code must retain the above copyright 13193323Sed * notice, this list of conditions and the following disclaimer. 14193323Sed * 2. Redistributions in binary form must reproduce the above copyright 15193323Sed * notice, this list of conditions and the following disclaimer in the 16193323Sed * documentation and/or other materials provided with the distribution. 17193323Sed * 3. All advertising materials mentioning features or use of this software 18193323Sed * must display the following acknowledgement: 19193323Sed * This product includes software developed by the University of 20193323Sed * California, Berkeley and its contributors. 21193323Sed * 4. Neither the name of the University nor the names of its contributors 22203954Srdivacky * may be used to endorse or promote products derived from this software 23193323Sed * without specific prior written permission. 24193323Sed * 25193323Sed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28193323Sed * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29193323Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30193323Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31193323Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32193323Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33193323Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34193323Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35193323Sed * SUCH DAMAGE. 36193323Sed * 37193323Sed * $FreeBSD: head/usr.bin/locate/bigram/locate.bigram.c 50477 1999-08-28 01:08:13Z peter $ 38193323Sed */ 39193323Sed 40193323Sed#ifndef lint 41193323Sedstatic char copyright[] = 42193323Sed"@(#) Copyright (c) 1989, 1993\n\ 43193323Sed The Regents of the University of California. All rights reserved.\n"; 44193323Sed#endif /* not lint */ 45193323Sed 46193323Sed#ifndef lint 47193323Sedstatic char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; 48193323Sed#endif /* not lint */ 49193323Sed 50193323Sed/* 51193323Sed * bigram < sorted_file_names | sort -nr | 52193323Sed * awk 'NR <= 128 { printf $2 }' > bigrams 53193323Sed * 54193323Sed * List bigrams for 'updatedb' script. 55193323Sed * Use 'code' to encode a file using this output. 56193323Sed */ 57193323Sed 58193323Sed#include <stdio.h> 59193323Sed#include <sys/param.h> /* for MAXPATHLEN */ 60193323Sed#include "locate.h" 61193323Sed 62193323Sedu_char buf1[MAXPATHLEN] = " "; 63193323Sedu_char buf2[MAXPATHLEN]; 64193323Sedu_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1]; 65193323Sed 66193323Sedint 67193323Sedmain(void) 68193323Sed{ 69193323Sed register u_char *cp; 70193323Sed register u_char *oldpath = buf1, *path = buf2; 71193323Sed register u_int i, j; 72193323Sed 73193323Sed while (fgets(path, sizeof(buf2), stdin) != NULL) { 74193323Sed 75193323Sed /* 76193323Sed * We don't need remove newline character '\n'. 77193323Sed * '\n' is less than ASCII_MIN and will be later 78193323Sed * ignored at output. 79193323Sed */ 80193323Sed 81193323Sed 82193323Sed /* skip longest common prefix */ 83193323Sed for (cp = path; *cp == *oldpath; cp++, oldpath++) 84193323Sed if (*cp == '\0') 85193323Sed break; 86193323Sed 87193323Sed while (*cp != '\0' && *(cp + 1) != '\0') { 88193323Sed bigram[(u_char)*cp][(u_char)*(cp + 1)]++; 89198090Srdivacky cp += 2; 90193323Sed } 91193323Sed 92193323Sed /* swap pointers */ 93193323Sed if (path == buf1) { 94193323Sed path = buf2; 95193323Sed oldpath = buf1; 96193323Sed } else { 97193323Sed path = buf1; 98193323Sed oldpath = buf2; 99193323Sed } 100193323Sed } 101193323Sed 102193323Sed /* output, boundary check */ 103193323Sed for (i = ASCII_MIN; i <= ASCII_MAX; i++) 104193323Sed for (j = ASCII_MIN; j <= ASCII_MAX; j++) 105193323Sed if (bigram[i][j] != 0) 106193323Sed (void)printf("%4u %c%c\n", bigram[i][j], i, j); 107198090Srdivacky 108193323Sed exit(0); 109193323Sed} 110193323Sed