| 1/* 2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5
|
1/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 2/* All Rights Reserved */ 3
| 6/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 7/* All Rights Reserved */ 8
|
4
| |
5/* 6 * Copyright (c) 1980 Regents of the University of California. 7 * All rights reserved. The Berkeley software License Agreement 8 * specifies the terms and conditions for redistribution. 9 */ 10
| 9/* 10 * Copyright (c) 1980 Regents of the University of California. 11 * All rights reserved. The Berkeley software License Agreement 12 * specifies the terms and conditions for redistribution. 13 */ 14
|
11/* 12 * Copyright (c) 1983, 1984 1985, 1986, 1987, 1988, Sun Microsystems, Inc. 13 * All Rights Reserved. 14 */
| 15#pragma ident "%Z%%M% %I% %E% SMI"
|
15
| 16
|
16#pragma ident "%Z%%M% %I% %E% SMI" 17
| |
18#include <stdio.h> 19#include <locale.h> 20#include <assert.h> 21
| 17#include <stdio.h> 18#include <locale.h> 19#include <assert.h> 20
|
22main(argc, argv) 23char *argv[];
| 21extern void err(); 22extern int newkeys(); 23extern int recopy(); 24extern void whash(); 25 26int 27main(int argc, char *argv[])
|
24{
| 28{
|
25 /* Make inverted file indexes. Reads a stream from mkey which
| 29 /* 30 * Make inverted file indexes. Reads a stream from mkey which
|
26 * gives record pointer items and keys. Generates set of files 27 * a. NHASH pointers to file b. 28 * b. lists of record numbers. 29 * c. record pointer items. 30 * 31 * these files are named xxx.ia, xxx.ib, xxx.ic; 32 * where xxx is taken from arg1. 33 * If the files exist they are updated. 34 */ 35 36 FILE *fa, *fb, *fc, *fta, *ftb, *ftc; 37 FILE *fd = NULL; 38 int nhash = 256; 39 int appflg = 1; 40 int keepkey = 0, pipein = 0; 41 char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 42 char tmpa[20], tmpb[20], tmpc[20]; 43 char *remove = NULL; 44 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
| 31 * gives record pointer items and keys. Generates set of files 32 * a. NHASH pointers to file b. 33 * b. lists of record numbers. 34 * c. record pointer items. 35 * 36 * these files are named xxx.ia, xxx.ib, xxx.ic; 37 * where xxx is taken from arg1. 38 * If the files exist they are updated. 39 */ 40 41 FILE *fa, *fb, *fc, *fta, *ftb, *ftc; 42 FILE *fd = NULL; 43 int nhash = 256; 44 int appflg = 1; 45 int keepkey = 0, pipein = 0; 46 char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 47 char tmpa[20], tmpb[20], tmpc[20]; 48 char *remove = NULL; 49 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
|
45 int i,j,k;
| 50 int i, j, k;
|
46 long keys;
| 51 long keys;
|
47 int iflong =0;
| 52 int iflong = 0;
|
48 char *sortdir; 49 50 (void) setlocale(LC_ALL, ""); 51 52#if !defined(TEXT_DOMAIN)
| 53 char *sortdir; 54 55 (void) setlocale(LC_ALL, ""); 56 57#if !defined(TEXT_DOMAIN)
|
53#define TEXT_DOMAIN "SYS_TEST"
| 58#define TEXT_DOMAIN "SYS_TEST"
|
54#endif 55 (void) textdomain(TEXT_DOMAIN); 56
| 59#endif 60 (void) textdomain(TEXT_DOMAIN); 61
|
57 sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; 58 while (argc>1 && argv[1][0] == '-') 59 { 60 switch(argv[1][1]) 61 {
| 62 sortdir = (access("/crp/tmp", 06) == 0) ? "/crp/tmp" : "/usr/tmp"; 63 while (argc > 1 && argv[1][0] == '-') { 64 switch (argv[1][1]) {
|
62 case 'h': /* size of hash table */
| 65 case 'h': /* size of hash table */
|
63 nhash = atoi (argv[1]+2);
| 66 nhash = atoi(argv[1]+2);
|
64 break; 65 case 'n': /* new, don't append */
| 67 break; 68 case 'n': /* new, don't append */
|
66 appflg=0;
| 69 appflg = 0;
|
67 break; 68 case 'a': /* append to old file */
| 70 break; 71 case 'a': /* append to old file */
|
69 appflg=1;
| 72 appflg = 1;
|
70 break; 71 case 'v': /* verbose output */
| 73 break; 74 case 'v': /* verbose output */
|
72 chatty=1;
| 75 chatty = 1;
|
73 break; 74 case 'd': /* keep keys on file .id for check on searching */
| 76 break; 77 case 'd': /* keep keys on file .id for check on searching */
|
75 keepkey=1;
| 78 keepkey = 1;
|
76 break;
| 79 break;
|
77 case 'p': /* pipe into sort (saves space, costs time)*/ 78 pipein = 1;
| 80 case 'p': /* pipe into sort (saves space, costs time) */ 81 pipein = 1;
|
79 break; 80 case 'i': /* input is on file, not stdin */ 81 close(0); 82 if (open(argv[2], 0) != 0) 83 err(gettext("Can't read input %s"), argv[2]);
| 82 break; 83 case 'i': /* input is on file, not stdin */ 84 close(0); 85 if (open(argv[2], 0) != 0) 86 err(gettext("Can't read input %s"), argv[2]);
|
84 if (argv[1][2]=='u') /* unlink */
| 87 if (argv[1][2] == 'u') /* unlink */
|
85 remove = argv[2];
| 88 remove = argv[2];
|
86 argc--;
| 89 argc--;
|
87 argv++; 88 break; 89 } 90 argc--; 91 argv++; 92 }
| 90 argv++; 91 break; 92 } 93 argc--; 94 argv++; 95 }
|
93 strcpy (nma, argc >= 2 ? argv[1] : "Index"); 94 strcpy (nmb, nma); 95 strcpy (nmc, nma); 96 strcpy (nmd, nma); 97 strcat (nma, ".ia"); 98 strcat (nmb, ".ib"); 99 strcat (nmc, ".ic"); 100 strcat (nmd, ".id");
| 96 strcpy(nma, argc >= 2 ? argv[1] : "Index"); 97 strcpy(nmb, nma); 98 strcpy(nmc, nma); 99 strcpy(nmd, nma); 100 strcat(nma, ".ia"); 101 strcat(nmb, ".ib"); 102 strcat(nmc, ".ic"); 103 strcat(nmd, ".id");
|
101 102 sprintf(tmpa, "junk%di", getpid());
| 104 105 sprintf(tmpa, "junk%di", getpid());
|
103 if (pipein) 104 {
| 106 if (pipein) {
|
105 sprintf(com, "/usr/bin/sort -T %s -o %s", sortdir, tmpa); 106 fta = popen(com, "w");
| 107 sprintf(com, "/usr/bin/sort -T %s -o %s", sortdir, tmpa); 108 fta = popen(com, "w");
|
107 } 108 else /* use tmp file */ 109 {
| 109 } else { /* use tmp file */
|
110 fta = fopen(tmpa, "w");
| 110 fta = fopen(tmpa, "w");
|
111 assert (fta != NULL);
| 111 assert(fta != NULL);
|
112 } 113 fb = 0;
| 112 } 113 fb = 0;
|
114 if (appflg ) 115 { 116 if (fb = fopen(nmb, "r")) 117 {
| 114 if (appflg) { 115 if (fb = fopen(nmb, "r")) {
|
118 sprintf(tmpb, "junk%dj", getpid()); 119 ftb = fopen(tmpb, "w");
| 116 sprintf(tmpb, "junk%dj", getpid()); 117 ftb = fopen(tmpb, "w");
|
120 if (ftb==NULL) 121 err(gettext("Can't get scratch file %s"),tmpb);
| 118 if (ftb == NULL) 119 err(gettext("Can't get scratch file %s"), tmpb);
|
122 nhash = recopy(ftb, fb, fopen(nma, "r")); 123 fclose(ftb);
| 120 nhash = recopy(ftb, fb, fopen(nma, "r")); 121 fclose(ftb);
|
124 } 125 else 126 appflg=0;
| 122 } else 123 appflg = 0;
|
127 } 128 fc = fopen(nmc, appflg ? "a" : "w"); 129 if (keepkey) 130 fd = keepkey ? fopen(nmd, "w") : 0; 131 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 132 fclose(stdin); 133 if (remove != NULL) 134 unlink(remove); 135 fclose(fta);
| 124 } 125 fc = fopen(nmc, appflg ? "a" : "w"); 126 if (keepkey) 127 fd = keepkey ? fopen(nmd, "w") : 0; 128 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 129 fclose(stdin); 130 if (remove != NULL) 131 unlink(remove); 132 fclose(fta);
|
136 if (pipein) 137 {
| 133 if (pipein) {
|
138 pclose(fta); 139 } 140 else 141 { 142 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 143 system(com); 144 }
| 134 pclose(fta); 135 } 136 else 137 { 138 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 139 system(com); 140 }
|
145 if (appflg) 146 {
| 141 if (appflg) {
|
147 sprintf(tmpc, "junk%dk", getpid()); 148 sprintf(com, "mv %s %s", tmpa, tmpc); 149 system(com); 150 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
| 142 sprintf(tmpc, "junk%dk", getpid()); 143 sprintf(com, "mv %s %s", tmpa, tmpc); 144 system(com); 145 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
|
151 tmpb, tmpc, tmpa);
| 146 tmpb, tmpc, tmpa);
|
152 system(com); 153 } 154 fta = fopen(tmpa, "r"); 155 fa = fopen(nma, "w"); 156 fb = fopen(nmb, "w"); 157 whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 158 fclose(fta);
| 147 system(com); 148 } 149 fta = fopen(tmpa, "r"); 150 fa = fopen(nma, "w"); 151 fb = fopen(nmb, "w"); 152 whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 153 fclose(fta);
|
159# ifndef D1
| 154#ifndef D1
|
160 unlink(tmpa);
| 155 unlink(tmpa);
|
161# endif 162 if (appflg) 163 {
| 156#endif 157 if (appflg) {
|
164 unlink(tmpb); 165 unlink(tmpc); 166 } 167 if (chatty)
| 158 unlink(tmpb); 159 unlink(tmpc); 160 } 161 if (chatty)
|
| 162 printf(gettext("%ld key occurrences, %d hashes, %d docs\n"), 163 keys, hashes, docs);
|
168
| 164
|
169 printf (gettext("%ld key occurrences, %d hashes, %d docs\n"), 170 keys, hashes, docs); 171 172 exit(0); 173 /* NOTREACHED */
| 165 return (0);
|
174}
| 166}
|