117980Swosch/* 217980Swosch * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 317980Swosch * Copyright (c) 1989, 1993 417980Swosch * The Regents of the University of California. All rights reserved. 517980Swosch * 617980Swosch * This code is derived from software contributed to Berkeley by 717980Swosch * James A. Woods. 817980Swosch * 917980Swosch * Redistribution and use in source and binary forms, with or without 1017980Swosch * modification, are permitted provided that the following conditions 1117980Swosch * are met: 1217980Swosch * 1. Redistributions of source code must retain the above copyright 1317980Swosch * notice, this list of conditions and the following disclaimer. 1417980Swosch * 2. Redistributions in binary form must reproduce the above copyright 1517980Swosch * notice, this list of conditions and the following disclaimer in the 1617980Swosch * documentation and/or other materials provided with the distribution. 1717980Swosch * 3. All advertising materials mentioning features or use of this software 1817980Swosch * must display the following acknowledgement: 1917980Swosch * This product includes software developed by the University of 2017980Swosch * California, Berkeley and its contributors. 2117980Swosch * 4. Neither the name of the University nor the names of its contributors 2217980Swosch * may be used to endorse or promote products derived from this software 2317980Swosch * without specific prior written permission. 2417980Swosch * 2517980Swosch * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2617980Swosch * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2717980Swosch * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2817980Swosch * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2917980Swosch * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3017980Swosch * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3117980Swosch * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3217980Swosch * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3317980Swosch * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3417980Swosch * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3517980Swosch * SUCH DAMAGE. 3617980Swosch * 3750477Speter * $FreeBSD$ 3817980Swosch */ 3917980Swosch 4017980Swosch 4117980Swosch#ifndef _LOCATE_STATISTIC_ 4217980Swosch#define _LOCATE_STATISTIC_ 4317980Swosch 4417980Swoschvoid 4517980Swoschstatistic (fp, path_fcodes) 4617980Swosch FILE *fp; /* open database */ 4717980Swosch char *path_fcodes; /* for error message */ 4817980Swosch{ 4918905Swosch register int lines, chars, size, big, zwerg; 5017980Swosch register u_char *p, *s; 5117980Swosch register int c; 5218905Swosch int count, umlaut; 5317980Swosch u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 5417980Swosch 5517980Swosch for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 5617980Swosch p[c] = check_bigram_char(getc(fp)); 5717980Swosch s[c] = check_bigram_char(getc(fp)); 5817980Swosch } 5917980Swosch 6018905Swosch lines = chars = big = zwerg = umlaut = 0; 6117980Swosch size = NBG + NBG; 6217980Swosch 6317980Swosch for (c = getc(fp), count = 0; c != EOF; size++) { 6417980Swosch if (c == SWITCH) { 6517980Swosch count += getwf(fp) - OFFSET; 6617980Swosch size += sizeof(int); 6718905Swosch zwerg++; 6817980Swosch } else 6917980Swosch count += c - OFFSET; 7017980Swosch 7117980Swosch for (p = path + count; (c = getc(fp)) > SWITCH; size++) 7218905Swosch if (c < PARITY) { 7318905Swosch if (c == UMLAUT) { 7418905Swosch c = getc(fp); 7518905Swosch size++; 7618905Swosch umlaut++; 7718905Swosch } 7817980Swosch p++; 7918905Swosch } else { 8018905Swosch /* bigram char */ 8117980Swosch big++; 8217980Swosch p += 2; 8317980Swosch } 8417980Swosch 8517980Swosch p++; 8617980Swosch lines++; 8717980Swosch chars += (p - path); 8817980Swosch } 8917980Swosch 9017980Swosch (void)printf("\nDatabase: %s\n", path_fcodes); 9117980Swosch (void)printf("Compression: Front: %2.2f%%, ", 9229739Swosch (size + big - (2 * NBG)) / (chars / (float)100)); 9329739Swosch (void)printf("Bigram: %2.2f%%, ", (size - big) / (size / (float)100)); 9418905Swosch (void)printf("Total: %2.2f%%\n", 9529739Swosch (size - (2 * NBG)) / (chars / (float)100)); 9617980Swosch (void)printf("Filenames: %d, ", lines); 9718905Swosch (void)printf("Characters: %d, ", chars); 9818905Swosch (void)printf("Database size: %d\n", size); 9918905Swosch (void)printf("Bigram characters: %d, ", big); 10018905Swosch (void)printf("Integers: %d, ", zwerg); 10118905Swosch (void)printf("8-Bit characters: %d\n", umlaut); 10217980Swosch 10317980Swosch} 10417980Swosch#endif /* _LOCATE_STATISTIC_ */ 10517980Swosch 106153197Sdesextern char separator; 10717980Swosch 10817980Swoschvoid 10917980Swosch#ifdef FF_MMAP 11017980Swosch 11117980Swosch 11217980Swosch#ifdef FF_ICASE 11317980Swoschfastfind_mmap_icase 11417980Swosch#else 11517980Swoschfastfind_mmap 11618905Swosch#endif /* FF_ICASE */ 11717980Swosch(pathpart, paddr, len, database) 11817980Swosch char *pathpart; /* search string */ 11917980Swosch caddr_t paddr; /* mmap pointer */ 12017980Swosch int len; /* length of database */ 12117980Swosch char *database; /* for error message */ 12217980Swosch 12317980Swosch 12417980Swosch#else /* MMAP */ 12517980Swosch 12617980Swosch 12717980Swosch#ifdef FF_ICASE 12817980Swoschfastfind_icase 12918905Swosch#else 13017980Swoschfastfind 13117980Swosch#endif /* FF_ICASE */ 13217980Swosch 13317980Swosch(fp, pathpart, database) 13417980Swosch FILE *fp; /* open database */ 13517980Swosch char *pathpart; /* search string */ 13617980Swosch char *database; /* for error message */ 13717980Swosch 13817980Swosch 13917980Swosch#endif /* MMAP */ 14017980Swosch 14117980Swosch{ 14217980Swosch register u_char *p, *s, *patend, *q, *foundchar; 14317980Swosch register int c, cc; 14417980Swosch int count, found, globflag; 14517980Swosch u_char *cutoff; 14617980Swosch u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 14717980Swosch 14817980Swosch#ifdef FF_ICASE 14917980Swosch /* use a lookup table for case insensitive search */ 15018905Swosch u_char table[UCHAR_MAX + 1]; 15117980Swosch 15217980Swosch tolower_word(pathpart); 15318905Swosch#endif /* FF_ICASE*/ 15417980Swosch 15517980Swosch /* init bigram table */ 15617980Swosch#ifdef FF_MMAP 15717980Swosch for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { 15817980Swosch p[c] = check_bigram_char(*paddr++); 15917980Swosch s[c] = check_bigram_char(*paddr++); 16017980Swosch } 16117980Swosch#else 16217980Swosch for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 16317980Swosch p[c] = check_bigram_char(getc(fp)); 16417980Swosch s[c] = check_bigram_char(getc(fp)); 16517980Swosch } 16618905Swosch#endif /* FF_MMAP */ 16717980Swosch 16817980Swosch /* find optimal (last) char for searching */ 16918829Swosch for (p = pathpart; *p != '\0'; p++) 170229403Sed if (strchr(LOCATE_REG, *p) != NULL) 17118829Swosch break; 17218829Swosch 17318829Swosch if (*p == '\0') 17418829Swosch globflag = 0; 17518829Swosch else 17618829Swosch globflag = 1; 17718829Swosch 17817980Swosch p = pathpart; 17917980Swosch patend = patprep(p); 18017980Swosch cc = *patend; 18117980Swosch 18217980Swosch#ifdef FF_ICASE 18317980Swosch /* set patend char to true */ 18420406Swosch for (c = 0; c < UCHAR_MAX + 1; c++) 18520406Swosch table[c] = 0; 18620406Swosch 18717980Swosch table[TOLOWER(*patend)] = 1; 18817980Swosch table[toupper(*patend)] = 1; 18918905Swosch#endif /* FF_ICASE */ 19017980Swosch 19117980Swosch 19217980Swosch /* main loop */ 19317980Swosch found = count = 0; 19417980Swosch foundchar = 0; 19517980Swosch 19617980Swosch#ifdef FF_MMAP 19718905Swosch c = (u_char)*paddr++; len--; 19818905Swosch for (; len > 0; ) { 19917980Swosch#else 20018905Swosch c = getc(fp); 20118905Swosch for (; c != EOF; ) { 20218905Swosch#endif /* FF_MMAP */ 20317980Swosch 20417980Swosch /* go forward or backward */ 20517980Swosch if (c == SWITCH) { /* big step, an integer */ 20617980Swosch#ifdef FF_MMAP 20717980Swosch count += getwm(paddr) - OFFSET; 20817980Swosch len -= INTSIZE; paddr += INTSIZE; 20917980Swosch#else 21017980Swosch count += getwf(fp) - OFFSET; 21118905Swosch#endif /* FF_MMAP */ 21217980Swosch } else { /* slow step, =< 14 chars */ 21317980Swosch count += c - OFFSET; 21417980Swosch } 21517980Swosch 216190656Sdelphij if (count < 0 || count > MAXPATHLEN) 217190656Sdelphij errx(1, "corrupted database: %s", database); 21817980Swosch /* overlay old path */ 21917980Swosch p = path + count; 22017980Swosch foundchar = p - 1; 22118905Swosch 22217980Swosch#ifdef FF_MMAP 22370716Swosch for (; len > 0;) { 22418905Swosch c = (u_char)*paddr++; 22518905Swosch len--; 22617980Swosch#else 22770716Swosch for (;;) { 22818905Swosch c = getc(fp); 22918905Swosch#endif /* FF_MMAP */ 23018905Swosch /* 23118905Swosch * == UMLAUT: 8 bit char followed 23218905Swosch * <= SWITCH: offset 23318905Swosch * >= PARITY: bigram 23418905Swosch * rest: single ascii char 23518905Swosch * 23618905Swosch * offset < SWITCH < UMLAUT < ascii < PARITY < bigram 23718905Swosch */ 23817980Swosch if (c < PARITY) { 23918905Swosch if (c <= UMLAUT) { 24018905Swosch if (c == UMLAUT) { 24118905Swosch#ifdef FF_MMAP 24218905Swosch c = (u_char)*paddr++; 24318905Swosch len--; 24418905Swosch#else 24518905Swosch c = getc(fp); 24618905Swosch#endif /* FF_MMAP */ 24718905Swosch 24818905Swosch } else 24918905Swosch break; /* SWITCH */ 25018905Swosch } 25117980Swosch#ifdef FF_ICASE 25217980Swosch if (table[c]) 25317980Swosch#else 25417980Swosch if (c == cc) 25518905Swosch#endif /* FF_ICASE */ 25617980Swosch foundchar = p; 25717980Swosch *p++ = c; 25817980Swosch } 25917980Swosch else { 26017980Swosch /* bigrams are parity-marked */ 26117980Swosch TO7BIT(c); 26217980Swosch 26317980Swosch#ifndef FF_ICASE 26417980Swosch if (bigram1[c] == cc || 26517980Swosch bigram2[c] == cc) 26617980Swosch#else 26717980Swosch 26817980Swosch if (table[bigram1[c]] || 26917980Swosch table[bigram2[c]]) 27018905Swosch#endif /* FF_ICASE */ 27117980Swosch foundchar = p + 1; 27217980Swosch 27317980Swosch *p++ = bigram1[c]; 27417980Swosch *p++ = bigram2[c]; 27517980Swosch } 27618905Swosch } 27717980Swosch 27817980Swosch if (found) { /* previous line matched */ 27917980Swosch cutoff = path; 28017980Swosch *p-- = '\0'; 28117980Swosch foundchar = p; 28217980Swosch } else if (foundchar >= path + count) { /* a char matched */ 28317980Swosch *p-- = '\0'; 28417980Swosch cutoff = path + count; 28517980Swosch } else /* nothing to do */ 28617980Swosch continue; 28717980Swosch 28817980Swosch found = 0; 28917980Swosch for (s = foundchar; s >= cutoff; s--) { 29017980Swosch if (*s == cc 29117980Swosch#ifdef FF_ICASE 29217980Swosch || TOLOWER(*s) == cc 29318905Swosch#endif /* FF_ICASE */ 29417980Swosch ) { /* fast first char check */ 29517980Swosch for (p = patend - 1, q = s - 1; *p != '\0'; 29617980Swosch p--, q--) 29717980Swosch if (*q != *p 29817980Swosch#ifdef FF_ICASE 29917980Swosch && TOLOWER(*q) != *p 30018905Swosch#endif /* FF_ICASE */ 30117980Swosch ) 30217980Swosch break; 30317980Swosch if (*p == '\0') { /* fast match success */ 30417980Swosch found = 1; 30519060Swosch if (!globflag || 30619060Swosch#ifndef FF_ICASE 30719060Swosch !fnmatch(pathpart, path, 0)) 30819060Swosch#else 30919060Swosch !fnmatch(pathpart, path, 31019134Sache FNM_CASEFOLD)) 31119060Swosch#endif /* !FF_ICASE */ 31219060Swosch { 31317980Swosch if (f_silent) 31417980Swosch counter++; 31517980Swosch else if (f_limit) { 31617980Swosch counter++; 31717980Swosch if (f_limit >= counter) 318153197Sdes (void)printf("%s%c",path,separator); 31927574Scharnier else 32027574Scharnier errx(0, "[show only %d lines]", counter - 1); 32117980Swosch } else 322153197Sdes (void)printf("%s%c",path,separator); 32317980Swosch } 32417980Swosch break; 32517980Swosch } 32617980Swosch } 32717980Swosch } 32817980Swosch } 32917980Swosch} 330