Deleted Added
full compact
locate.bigram.c (17592) locate.bigram.c (17776)
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * James A. Woods.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 18 unchanged lines hidden (view full) ---

27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
1/*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * James A. Woods.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 18 unchanged lines hidden (view full) ---

27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * $Id$
35 */
36
37#ifndef lint
38static char copyright[] =
39"@(#) Copyright (c) 1989, 1993\n\
40 The Regents of the University of California. All rights reserved.\n";
41#endif /* not lint */
42
43#ifndef lint
44static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93";
45#endif /* not lint */
46
47/*
37 */
38
39#ifndef lint
40static char copyright[] =
41"@(#) Copyright (c) 1989, 1993\n\
42 The Regents of the University of California. All rights reserved.\n";
43#endif /* not lint */
44
45#ifndef lint
46static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93";
47#endif /* not lint */
48
49/*
48 * bigram < text > bigrams
50 * bigram < sorted_file_names | sort -nr |
51 * awk 'NR <= 128 { printf $2 }' > bigrams
49 *
50 * List bigrams for 'updatedb' script.
51 * Use 'code' to encode a file using this output.
52 */
53
54#include <stdio.h>
55#include <sys/param.h> /* for MAXPATHLEN */
56#include <string.h> /* memchr */
57#include "locate.h"
58
59u_char buf1[MAXPATHLEN] = " ";
60u_char buf2[MAXPATHLEN];
52 *
53 * List bigrams for 'updatedb' script.
54 * Use 'code' to encode a file using this output.
55 */
56
57#include <stdio.h>
58#include <sys/param.h> /* for MAXPATHLEN */
59#include <string.h> /* memchr */
60#include "locate.h"
61
62u_char buf1[MAXPATHLEN] = " ";
63u_char buf2[MAXPATHLEN];
61unsigned int bigram[UCHAR_MAX][UCHAR_MAX];
64u_int bigram[UCHAR_MAX][UCHAR_MAX];
62
65
63
64void main ( )
66int
67main(void)
65{
66 register u_char *cp;
67 register u_char *oldpath = buf1, *path = buf2;
68{
69 register u_char *cp;
70 register u_char *oldpath = buf1, *path = buf2;
68 register int i, j;
71 register u_int i, j;
69
72
70 /* init bigram buffer */
71 for (i = 0; i < UCHAR_MAX; i++)
72 for (j = 0; j < UCHAR_MAX; j++)
73 bigram[i][j] = 0;
73 while (fgets(path, sizeof(buf2), stdin) != NULL) {
74
74
75 while ( fgets ( path, sizeof(buf2), stdin ) != NULL ) {
76
77 /* skip empty lines */
78 if (*path == '\n')
79 continue;
80
81 /* Squelch characters that would botch the decoding. */
82 for (cp = path; *cp != NULL; cp++) {
83 /* chop newline */
84 if (*cp == '\n')
85 *cp = NULL;
86 /* range */
87 else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
88 *cp = '?';
89 }
90
75 /* skip empty lines */
76 if (*path == '\n')
77 continue;
78
79 /* Squelch characters that would botch the decoding. */
80 for (cp = path; *cp != NULL; cp++) {
81 /* chop newline */
82 if (*cp == '\n')
83 *cp = NULL;
84 /* range */
85 else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
86 *cp = '?';
87 }
88
91
92 /* skip longest common prefix */
89 /* skip longest common prefix */
93 for (cp = path; *cp == *oldpath && *cp; cp++, oldpath++);
90 for (cp = path; *cp == *oldpath && *cp != NULL; cp++, oldpath++);
94
91
95 /*
96 * output post-residue bigrams only
97 */
98
99 /* check later for boundary */
100 while ( *cp != NULL && *(cp + 1) != NULL ) {
92 while (*cp != NULL && *(cp+1) != NULL) {
101 bigram[*cp][*(cp+1)]++;
102 cp += 2;
103 }
104
93 bigram[*cp][*(cp+1)]++;
94 cp += 2;
95 }
96
105 if ( path == buf1 ) /* swap pointers */
106 path = buf2, oldpath = buf1;
107 else
108 path = buf1, oldpath = buf2;
97 /* swap pointers */
98 if (path == buf1) {
99 path = buf2;
100 oldpath = buf1;
101 } else {
102 path = buf1;
103 oldpath = buf2;
104 }
109 }
110
105 }
106
111 /* output, boundary check */
107 /* output, (paranoid) boundary check */
112 for (i = ASCII_MIN; i <= ASCII_MAX; i++)
113 for (j = ASCII_MIN; j <= ASCII_MAX; j++)
114 if (bigram[i][j] != 0)
108 for (i = ASCII_MIN; i <= ASCII_MAX; i++)
109 for (j = ASCII_MIN; j <= ASCII_MAX; j++)
110 if (bigram[i][j] != 0)
115 fprintf(stdout, "%4d %c%c\n",
116 bigram[i][j], i, j);
111 printf("%4u %c%c\n", bigram[i][j], i, j);
112
113 exit(0);
117}
114}