strfile.c revision 81586
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Ken Arnold. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#ifndef lint 38static const char copyright[] = 39"@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41#endif /* not lint */ 42 43#ifndef lint 44#if 0 45static const char sccsid[] = "@(#)strfile.c 8.1 (Berkeley) 5/31/93"; 46#else 47static const char rcsid[] = 48 "$FreeBSD: head/games/fortune/strfile/strfile.c 81586 2001-08-13 14:06:34Z ru $"; 49#endif 50#endif /* not lint */ 51 52# include <sys/param.h> 53# include <stdio.h> 54# include <stdlib.h> 55# include <ctype.h> 56# include <string.h> 57# include <time.h> 58# include <locale.h> 59# include <unistd.h> 60# include "strfile.h" 61 62/* 63 * This program takes a file composed of strings separated by 64 * lines starting with two consecutive delimiting character (default 65 * character is '%') and creates another file which consists of a table 66 * describing the file (structure from "strfile.h"), a table of seek 67 * pointers to the start of the strings, and the strings, each terminated 68 * by a null byte. Usage: 69 * 70 * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] 71 * 72 * C - Allow comments marked by a double delimiter at line's beginning 73 * c - Change delimiting character from '%' to 'C' 74 * s - Silent. Give no summary of data processed at the end of 75 * the run. 76 * o - order the strings in alphabetic order 77 * i - if ordering, ignore case 78 * r - randomize the order of the strings 79 * x - set rotated bit 80 * 81 * Ken Arnold Sept. 7, 1978 -- 82 * 83 * Added ordering options. 84 */ 85 86# define TRUE 1 87# define FALSE 0 88 89# define STORING_PTRS (Oflag || Rflag) 90# define CHUNKSIZE 512 91 92# define ALLOC(ptr,sz) { \ 93 if (ptr == NULL) \ 94 ptr = malloc((unsigned int) (CHUNKSIZE * sizeof *ptr)); \ 95 else if (((sz) + 1) % CHUNKSIZE == 0) \ 96 ptr = realloc((void *) ptr, ((unsigned int) ((sz) + CHUNKSIZE) * sizeof *ptr)); \ 97 if (ptr == NULL) { \ 98 fprintf(stderr, "out of space\n"); \ 99 exit(1); \ 100 } \ 101 } 102 103#ifdef NO_VOID 104# define void char 105#endif 106 107typedef struct { 108 char first; 109 long pos; 110} STR; 111 112char *Infile = NULL, /* input file name */ 113 Outfile[MAXPATHLEN] = "", /* output file name */ 114 Delimch = '%'; /* delimiting character */ 115 116int Cflag = FALSE; /* embedded comments */ 117int Sflag = FALSE; /* silent run flag */ 118int Oflag = FALSE; /* ordering flag */ 119int Iflag = FALSE; /* ignore case flag */ 120int Rflag = FALSE; /* randomize order flag */ 121int Xflag = FALSE; /* set rotated bit */ 122long Num_pts = 0; /* number of pointers/strings */ 123 124long *Seekpts; 125 126FILE *Sort_1, *Sort_2; /* pointers for sorting */ 127 128STRFILE Tbl; /* statistics table */ 129 130STR *Firstch; /* first chars of each string */ 131 132void add_offset __P((FILE *, long)); 133int cmp_str __P((const void *, const void *)); 134static int collate_range_cmp __P((int, int)); 135void do_order __P((void)); 136void getargs __P((int, char **)); 137void randomize __P((void)); 138void usage __P((void)); 139 140/* 141 * main: 142 * Drive the sucker. There are two main modes -- either we store 143 * the seek pointers, if the table is to be sorted or randomized, 144 * or we write the pointer directly to the file, if we are to stay 145 * in file order. If the former, we allocate and re-allocate in 146 * CHUNKSIZE blocks; if the latter, we just write each pointer, 147 * and then seek back to the beginning to write in the table. 148 */ 149int main(ac, av) 150int ac; 151char **av; 152{ 153 char *sp, dc; 154 FILE *inf, *outf; 155 long last_off, length, pos, *p; 156 int first, cnt; 157 char *nsp; 158 STR *fp; 159 static char string[257]; 160 161 (void) setlocale(LC_ALL, ""); 162 163 getargs(ac, av); /* evalute arguments */ 164 dc = Delimch; 165 if ((inf = fopen(Infile, "r")) == NULL) { 166 perror(Infile); 167 exit(1); 168 } 169 170 if ((outf = fopen(Outfile, "w")) == NULL) { 171 perror(Outfile); 172 exit(1); 173 } 174 if (!STORING_PTRS) 175 (void) fseek(outf, (long) sizeof Tbl, 0); 176 177 /* 178 * Write the strings onto the file 179 */ 180 181 Tbl.str_longlen = 0; 182 Tbl.str_shortlen = ~((unsigned long) 0); 183 Tbl.str_delim = dc; 184 Tbl.str_version = VERSION; 185 first = Oflag; 186 add_offset(outf, ftell(inf)); 187 last_off = 0; 188 do { 189 sp = fgets(string, 256, inf); 190 if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { 191 pos = ftell(inf); 192 length = pos - last_off - (sp ? strlen(sp) : 0); 193 last_off = pos; 194 if (!length) 195 continue; 196 add_offset(outf, pos); 197 if (Tbl.str_longlen < length) 198 Tbl.str_longlen = length; 199 if (Tbl.str_shortlen > length) 200 Tbl.str_shortlen = length; 201 first = Oflag; 202 } 203 else if (first) { 204 for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) 205 continue; 206 ALLOC(Firstch, Num_pts); 207 fp = &Firstch[Num_pts - 1]; 208 if (Iflag && isupper((unsigned char)*nsp)) 209 fp->first = tolower((unsigned char)*nsp); 210 else 211 fp->first = *nsp; 212 fp->pos = Seekpts[Num_pts - 1]; 213 first = FALSE; 214 } 215 } while (sp != NULL); 216 217 /* 218 * write the tables in 219 */ 220 221 (void) fclose(inf); 222 Tbl.str_numstr = Num_pts - 1; 223 224 if (Cflag) 225 Tbl.str_flags |= STR_COMMENTS; 226 227 if (Oflag) 228 do_order(); 229 else if (Rflag) 230 randomize(); 231 232 if (Xflag) 233 Tbl.str_flags |= STR_ROTATED; 234 235 if (!Sflag) { 236 printf("\"%s\" created\n", Outfile); 237 if (Num_pts == 2) 238 puts("There was 1 string"); 239 else 240 printf("There were %ld strings\n", Num_pts - 1); 241 printf("Longest string: %lu byte%s\n", Tbl.str_longlen, 242 Tbl.str_longlen == 1 ? "" : "s"); 243 printf("Shortest string: %lu byte%s\n", Tbl.str_shortlen, 244 Tbl.str_shortlen == 1 ? "" : "s"); 245 } 246 247 rewind(outf); 248 Tbl.str_version = htonl(Tbl.str_version); 249 Tbl.str_numstr = htonl(Tbl.str_numstr); 250 Tbl.str_longlen = htonl(Tbl.str_longlen); 251 Tbl.str_shortlen = htonl(Tbl.str_shortlen); 252 Tbl.str_flags = htonl(Tbl.str_flags); 253 (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); 254 if (STORING_PTRS) { 255 for (p = Seekpts, cnt = Num_pts; cnt--; ++p) 256 *p = htonl(*p); 257 (void) fwrite((char *) Seekpts, sizeof *Seekpts, (int) Num_pts, outf); 258 } 259 (void) fclose(outf); 260 exit(0); 261} 262 263/* 264 * This routine evaluates arguments from the command line 265 */ 266void getargs(argc, argv) 267int argc; 268char **argv; 269{ 270 int ch; 271 272 while ((ch = getopt(argc, argv, "Cc:iorsx")) != EOF) 273 switch(ch) { 274 case 'C': /* embedded comments */ 275 Cflag++; 276 break; 277 case 'c': /* new delimiting char */ 278 Delimch = *optarg; 279 if (!isascii(Delimch)) { 280 printf("bad delimiting character: '\\%o\n'", 281 (unsigned char)Delimch); 282 } 283 break; 284 case 'i': /* ignore case in ordering */ 285 Iflag++; 286 break; 287 case 'o': /* order strings */ 288 Oflag++; 289 break; 290 case 'r': /* randomize pointers */ 291 Rflag++; 292 break; 293 case 's': /* silent */ 294 Sflag++; 295 break; 296 case 'x': /* set the rotated bit */ 297 Xflag++; 298 break; 299 case '?': 300 default: 301 usage(); 302 } 303 argv += optind; 304 305 if (*argv) { 306 Infile = *argv; 307 if (*++argv) 308 (void) strcpy(Outfile, *argv); 309 } 310 if (!Infile) { 311 puts("No input file name"); 312 usage(); 313 } 314 if (*Outfile == '\0') { 315 (void) strcpy(Outfile, Infile); 316 (void) strcat(Outfile, ".dat"); 317 } 318} 319 320void usage() 321{ 322 (void) fprintf(stderr, 323 "strfile [-Ciorsx] [-c char] sourcefile [datafile]\n"); 324 exit(1); 325} 326 327/* 328 * add_offset: 329 * Add an offset to the list, or write it out, as appropriate. 330 */ 331void add_offset(fp, off) 332FILE *fp; 333long off; 334{ 335 long net; 336 337 if (!STORING_PTRS) { 338 net = htonl(off); 339 fwrite(&net, 1, sizeof net, fp); 340 } else { 341 ALLOC(Seekpts, Num_pts + 1); 342 Seekpts[Num_pts] = off; 343 } 344 Num_pts++; 345} 346 347/* 348 * do_order: 349 * Order the strings alphabetically (possibly ignoring case). 350 */ 351void do_order() 352{ 353 int i; 354 long *lp; 355 STR *fp; 356 357 Sort_1 = fopen(Infile, "r"); 358 Sort_2 = fopen(Infile, "r"); 359 qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); 360 i = Tbl.str_numstr; 361 lp = Seekpts; 362 fp = Firstch; 363 while (i--) 364 *lp++ = fp++->pos; 365 (void) fclose(Sort_1); 366 (void) fclose(Sort_2); 367 Tbl.str_flags |= STR_ORDERED; 368} 369 370static int collate_range_cmp (c1, c2) 371 int c1, c2; 372{ 373 static char s1[2], s2[2]; 374 int ret; 375 376 c1 &= UCHAR_MAX; 377 c2 &= UCHAR_MAX; 378 if (c1 == c2) 379 return (0); 380 s1[0] = c1; 381 s2[0] = c2; 382 if ((ret = strcoll(s1, s2)) != 0) 383 return (ret); 384 return (c1 - c2); 385} 386 387/* 388 * cmp_str: 389 * Compare two strings in the file 390 */ 391int cmp_str(s1, s2) 392const void *s1, *s2; 393{ 394 const STR *p1, *p2; 395 int c1, c2; 396 int n1, n2; 397 int r; 398 399# define SET_N(nf,ch) (nf = (ch == '\n')) 400# define IS_END(ch,nf) (ch == EOF || (ch == (unsigned char) Delimch && nf)) 401 402 p1 = (const STR *) s1; 403 p2 = (const STR *) s2; 404 405 c1 = (unsigned char) p1->first; 406 c2 = (unsigned char) p2->first; 407 if ((r = collate_range_cmp(c1, c2)) != 0) 408 return r; 409 410 (void) fseek(Sort_1, p1->pos, 0); 411 (void) fseek(Sort_2, p2->pos, 0); 412 413 n1 = FALSE; 414 n2 = FALSE; 415 while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0' && c1 != EOF) 416 SET_N(n1, c1); 417 while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0' && c2 != EOF) 418 SET_N(n2, c2); 419 420 while (!IS_END(c1, n1) && !IS_END(c2, n2)) { 421 if (Iflag) { 422 if (isupper(c1)) 423 c1 = tolower(c1); 424 if (isupper(c2)) 425 c2 = tolower(c2); 426 } 427 if ((r = collate_range_cmp(c1, c2)) != 0) 428 return r; 429 SET_N(n1, c1); 430 SET_N(n2, c2); 431 c1 = getc(Sort_1); 432 c2 = getc(Sort_2); 433 } 434 if (IS_END(c1, n1)) 435 c1 = 0; 436 if (IS_END(c2, n2)) 437 c2 = 0; 438 return collate_range_cmp(c1, c2); 439} 440 441/* 442 * randomize: 443 * Randomize the order of the string table. We must be careful 444 * not to randomize across delimiter boundaries. All 445 * randomization is done within each block. 446 */ 447void randomize() 448{ 449 int cnt, i; 450 long tmp; 451 long *sp; 452 453 srandomdev(); 454 455 Tbl.str_flags |= STR_RANDOM; 456 cnt = Tbl.str_numstr; 457 458 /* 459 * move things around randomly 460 */ 461 462 for (sp = Seekpts; cnt > 0; cnt--, sp++) { 463 i = random() % cnt; 464 tmp = sp[0]; 465 sp[0] = sp[i]; 466 sp[i] = tmp; 467 } 468} 469