inv1.c (0:68f95e015346) inv1.c (719:6c26331bc6b8)
1/*
2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
4 */
5
1/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2/* All Rights Reserved */
3
6/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
7/* All Rights Reserved */
8
4
5/*
6 * Copyright (c) 1980 Regents of the University of California.
7 * All rights reserved. The Berkeley software License Agreement
8 * specifies the terms and conditions for redistribution.
9 */
10
9/*
10 * Copyright (c) 1980 Regents of the University of California.
11 * All rights reserved. The Berkeley software License Agreement
12 * specifies the terms and conditions for redistribution.
13 */
14
11/*
12 * Copyright (c) 1983, 1984 1985, 1986, 1987, 1988, Sun Microsystems, Inc.
13 * All Rights Reserved.
14 */
15#pragma ident "%Z%%M% %I% %E% SMI"
15
16
16#pragma ident "%Z%%M% %I% %E% SMI"
17
18#include <stdio.h>
19#include <locale.h>
20#include <assert.h>
21
17#include <stdio.h>
18#include <locale.h>
19#include <assert.h>
20
22main(argc, argv)
23char *argv[];
21extern void err();
22extern int newkeys();
23extern int recopy();
24extern void whash();
25
26int
27main(int argc, char *argv[])
24{
28{
25 /* Make inverted file indexes. Reads a stream from mkey which
29 /*
30 * Make inverted file indexes. Reads a stream from mkey which
26 * gives record pointer items and keys. Generates set of files
27 * a. NHASH pointers to file b.
28 * b. lists of record numbers.
29 * c. record pointer items.
30 *
31 * these files are named xxx.ia, xxx.ib, xxx.ic;
32 * where xxx is taken from arg1.
33 * If the files exist they are updated.
34 */
35
36 FILE *fa, *fb, *fc, *fta, *ftb, *ftc;
37 FILE *fd = NULL;
38 int nhash = 256;
39 int appflg = 1;
40 int keepkey = 0, pipein = 0;
41 char nma[100], nmb[100], nmc[100], com[100], nmd[100];
42 char tmpa[20], tmpb[20], tmpc[20];
43 char *remove = NULL;
44 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
31 * gives record pointer items and keys. Generates set of files
32 * a. NHASH pointers to file b.
33 * b. lists of record numbers.
34 * c. record pointer items.
35 *
36 * these files are named xxx.ia, xxx.ib, xxx.ic;
37 * where xxx is taken from arg1.
38 * If the files exist they are updated.
39 */
40
41 FILE *fa, *fb, *fc, *fta, *ftb, *ftc;
42 FILE *fd = NULL;
43 int nhash = 256;
44 int appflg = 1;
45 int keepkey = 0, pipein = 0;
46 char nma[100], nmb[100], nmc[100], com[100], nmd[100];
47 char tmpa[20], tmpb[20], tmpc[20];
48 char *remove = NULL;
49 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
45 int i,j,k;
50 int i, j, k;
46 long keys;
51 long keys;
47 int iflong =0;
52 int iflong = 0;
48 char *sortdir;
49
50 (void) setlocale(LC_ALL, "");
51
52#if !defined(TEXT_DOMAIN)
53 char *sortdir;
54
55 (void) setlocale(LC_ALL, "");
56
57#if !defined(TEXT_DOMAIN)
53#define TEXT_DOMAIN "SYS_TEST"
58#define TEXT_DOMAIN "SYS_TEST"
54#endif
55 (void) textdomain(TEXT_DOMAIN);
56
59#endif
60 (void) textdomain(TEXT_DOMAIN);
61
57 sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp";
58 while (argc>1 && argv[1][0] == '-')
59 {
60 switch(argv[1][1])
61 {
62 sortdir = (access("/crp/tmp", 06) == 0) ? "/crp/tmp" : "/usr/tmp";
63 while (argc > 1 && argv[1][0] == '-') {
64 switch (argv[1][1]) {
62 case 'h': /* size of hash table */
65 case 'h': /* size of hash table */
63 nhash = atoi (argv[1]+2);
66 nhash = atoi(argv[1]+2);
64 break;
65 case 'n': /* new, don't append */
67 break;
68 case 'n': /* new, don't append */
66 appflg=0;
69 appflg = 0;
67 break;
68 case 'a': /* append to old file */
70 break;
71 case 'a': /* append to old file */
69 appflg=1;
72 appflg = 1;
70 break;
71 case 'v': /* verbose output */
73 break;
74 case 'v': /* verbose output */
72 chatty=1;
75 chatty = 1;
73 break;
74 case 'd': /* keep keys on file .id for check on searching */
76 break;
77 case 'd': /* keep keys on file .id for check on searching */
75 keepkey=1;
78 keepkey = 1;
76 break;
79 break;
77 case 'p': /* pipe into sort (saves space, costs time)*/
78 pipein = 1;
80 case 'p': /* pipe into sort (saves space, costs time) */
81 pipein = 1;
79 break;
80 case 'i': /* input is on file, not stdin */
81 close(0);
82 if (open(argv[2], 0) != 0)
83 err(gettext("Can't read input %s"), argv[2]);
82 break;
83 case 'i': /* input is on file, not stdin */
84 close(0);
85 if (open(argv[2], 0) != 0)
86 err(gettext("Can't read input %s"), argv[2]);
84 if (argv[1][2]=='u') /* unlink */
87 if (argv[1][2] == 'u') /* unlink */
85 remove = argv[2];
88 remove = argv[2];
86 argc--;
89 argc--;
87 argv++;
88 break;
89 }
90 argc--;
91 argv++;
92 }
90 argv++;
91 break;
92 }
93 argc--;
94 argv++;
95 }
93 strcpy (nma, argc >= 2 ? argv[1] : "Index");
94 strcpy (nmb, nma);
95 strcpy (nmc, nma);
96 strcpy (nmd, nma);
97 strcat (nma, ".ia");
98 strcat (nmb, ".ib");
99 strcat (nmc, ".ic");
100 strcat (nmd, ".id");
96 strcpy(nma, argc >= 2 ? argv[1] : "Index");
97 strcpy(nmb, nma);
98 strcpy(nmc, nma);
99 strcpy(nmd, nma);
100 strcat(nma, ".ia");
101 strcat(nmb, ".ib");
102 strcat(nmc, ".ic");
103 strcat(nmd, ".id");
101
102 sprintf(tmpa, "junk%di", getpid());
104
105 sprintf(tmpa, "junk%di", getpid());
103 if (pipein)
104 {
106 if (pipein) {
105 sprintf(com, "/usr/bin/sort -T %s -o %s", sortdir, tmpa);
106 fta = popen(com, "w");
107 sprintf(com, "/usr/bin/sort -T %s -o %s", sortdir, tmpa);
108 fta = popen(com, "w");
107 }
108 else /* use tmp file */
109 {
109 } else { /* use tmp file */
110 fta = fopen(tmpa, "w");
110 fta = fopen(tmpa, "w");
111 assert (fta != NULL);
111 assert(fta != NULL);
112 }
113 fb = 0;
112 }
113 fb = 0;
114 if (appflg )
115 {
116 if (fb = fopen(nmb, "r"))
117 {
114 if (appflg) {
115 if (fb = fopen(nmb, "r")) {
118 sprintf(tmpb, "junk%dj", getpid());
119 ftb = fopen(tmpb, "w");
116 sprintf(tmpb, "junk%dj", getpid());
117 ftb = fopen(tmpb, "w");
120 if (ftb==NULL)
121 err(gettext("Can't get scratch file %s"),tmpb);
118 if (ftb == NULL)
119 err(gettext("Can't get scratch file %s"), tmpb);
122 nhash = recopy(ftb, fb, fopen(nma, "r"));
123 fclose(ftb);
120 nhash = recopy(ftb, fb, fopen(nma, "r"));
121 fclose(ftb);
124 }
125 else
126 appflg=0;
122 } else
123 appflg = 0;
127 }
128 fc = fopen(nmc, appflg ? "a" : "w");
129 if (keepkey)
130 fd = keepkey ? fopen(nmd, "w") : 0;
131 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
132 fclose(stdin);
133 if (remove != NULL)
134 unlink(remove);
135 fclose(fta);
124 }
125 fc = fopen(nmc, appflg ? "a" : "w");
126 if (keepkey)
127 fd = keepkey ? fopen(nmd, "w") : 0;
128 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
129 fclose(stdin);
130 if (remove != NULL)
131 unlink(remove);
132 fclose(fta);
136 if (pipein)
137 {
133 if (pipein) {
138 pclose(fta);
139 }
140 else
141 {
142 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
143 system(com);
144 }
134 pclose(fta);
135 }
136 else
137 {
138 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
139 system(com);
140 }
145 if (appflg)
146 {
141 if (appflg) {
147 sprintf(tmpc, "junk%dk", getpid());
148 sprintf(com, "mv %s %s", tmpa, tmpc);
149 system(com);
150 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
142 sprintf(tmpc, "junk%dk", getpid());
143 sprintf(com, "mv %s %s", tmpa, tmpc);
144 system(com);
145 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
151 tmpb, tmpc, tmpa);
146 tmpb, tmpc, tmpa);
152 system(com);
153 }
154 fta = fopen(tmpa, "r");
155 fa = fopen(nma, "w");
156 fb = fopen(nmb, "w");
157 whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
158 fclose(fta);
147 system(com);
148 }
149 fta = fopen(tmpa, "r");
150 fa = fopen(nma, "w");
151 fb = fopen(nmb, "w");
152 whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
153 fclose(fta);
159# ifndef D1
154#ifndef D1
160 unlink(tmpa);
155 unlink(tmpa);
161# endif
162 if (appflg)
163 {
156#endif
157 if (appflg) {
164 unlink(tmpb);
165 unlink(tmpc);
166 }
167 if (chatty)
158 unlink(tmpb);
159 unlink(tmpc);
160 }
161 if (chatty)
162 printf(gettext("%ld key occurrences, %d hashes, %d docs\n"),
163 keys, hashes, docs);
168
164
169 printf (gettext("%ld key occurrences, %d hashes, %d docs\n"),
170 keys, hashes, docs);
171
172 exit(0);
173 /* NOTREACHED */
165 return (0);
174}
166}