strfile.c revision 201175
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Ken Arnold. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#if 0 38#ifndef lint 39static const char copyright[] = 40"@(#) Copyright (c) 1989, 1993\n\ 41 The Regents of the University of California. All rights reserved.\n"; 42#endif /* not lint */ 43 44#ifndef lint 45static const char sccsid[] = "@(#)strfile.c 8.1 (Berkeley) 5/31/93"; 46#endif /* not lint */ 47#endif 48#include <sys/cdefs.h> 49__FBSDID("$FreeBSD: head/games/fortune/strfile/strfile.c 201175 2009-12-29 08:42:58Z ed $"); 50 51# include <sys/param.h> 52# include <sys/endian.h> 53# include <stdio.h> 54# include <stdlib.h> 55# include <ctype.h> 56# include <string.h> 57# include <time.h> 58# include <locale.h> 59# include <unistd.h> 60# include "strfile.h" 61 62/* 63 * This program takes a file composed of strings separated by 64 * lines starting with two consecutive delimiting character (default 65 * character is '%') and creates another file which consists of a table 66 * describing the file (structure from "strfile.h"), a table of seek 67 * pointers to the start of the strings, and the strings, each terminated 68 * by a null byte. Usage: 69 * 70 * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] 71 * 72 * C - Allow comments marked by a double delimiter at line's beginning 73 * c - Change delimiting character from '%' to 'C' 74 * s - Silent. Give no summary of data processed at the end of 75 * the run. 76 * o - order the strings in alphabetic order 77 * i - if ordering, ignore case 78 * r - randomize the order of the strings 79 * x - set rotated bit 80 * 81 * Ken Arnold Sept. 7, 1978 -- 82 * 83 * Added ordering options. 84 */ 85 86# define TRUE 1 87# define FALSE 0 88 89# define STORING_PTRS (Oflag || Rflag) 90# define CHUNKSIZE 512 91 92# define ALLOC(ptr,sz) { \ 93 if (ptr == NULL) \ 94 ptr = malloc(CHUNKSIZE * sizeof *ptr); \ 95 else if (((sz) + 1) % CHUNKSIZE == 0) \ 96 ptr = realloc(ptr, ((sz) + CHUNKSIZE) * sizeof *ptr); \ 97 if (ptr == NULL) { \ 98 fprintf(stderr, "out of space\n"); \ 99 exit(1); \ 100 } \ 101 } 102 103#ifdef NO_VOID 104# define void char 105#endif 106 107typedef struct { 108 int first; 109 off_t pos; 110} STR; 111 112static char *Infile = NULL, /* input file name */ 113 Outfile[MAXPATHLEN] = "", /* output file name */ 114 Delimch = '%'; /* delimiting character */ 115 116static int Cflag = FALSE; /* embedded comments */ 117static int Sflag = FALSE; /* silent run flag */ 118static int Oflag = FALSE; /* ordering flag */ 119static int Iflag = FALSE; /* ignore case flag */ 120static int Rflag = FALSE; /* randomize order flag */ 121static int Xflag = FALSE; /* set rotated bit */ 122static uint32_t Num_pts = 0; /* number of pointers/strings */ 123 124static off_t *Seekpts; 125 126static FILE *Sort_1, *Sort_2; /* pointers for sorting */ 127 128static STRFILE Tbl; /* statistics table */ 129 130static STR *Firstch; /* first chars of each string */ 131 132static void add_offset(FILE *, off_t); 133static int cmp_str(const void *, const void *); 134static int stable_collate_range_cmp(int, int); 135static void do_order(void); 136static void getargs(int, char **); 137static void randomize(void); 138static void usage(void); 139 140/* 141 * main: 142 * Drive the sucker. There are two main modes -- either we store 143 * the seek pointers, if the table is to be sorted or randomized, 144 * or we write the pointer directly to the file, if we are to stay 145 * in file order. If the former, we allocate and re-allocate in 146 * CHUNKSIZE blocks; if the latter, we just write each pointer, 147 * and then seek back to the beginning to write in the table. 148 */ 149int 150main(int ac, char *av[]) 151{ 152 char *sp, dc; 153 FILE *inf, *outf; 154 off_t last_off, pos, *p; 155 size_t length; 156 int first; 157 uint32_t cnt; 158 char *nsp; 159 STR *fp; 160 static char string[257]; 161 162 (void) setlocale(LC_ALL, ""); 163 164 getargs(ac, av); /* evalute arguments */ 165 dc = Delimch; 166 if ((inf = fopen(Infile, "r")) == NULL) { 167 perror(Infile); 168 exit(1); 169 } 170 171 if ((outf = fopen(Outfile, "w")) == NULL) { 172 perror(Outfile); 173 exit(1); 174 } 175 if (!STORING_PTRS) 176 (void) fseek(outf, (long) sizeof Tbl, 0); 177 178 /* 179 * Write the strings onto the file 180 */ 181 182 Tbl.str_longlen = 0; 183 Tbl.str_shortlen = 0xffffffff; 184 Tbl.str_delim = dc; 185 Tbl.str_version = VERSION; 186 first = Oflag; 187 add_offset(outf, ftello(inf)); 188 last_off = 0; 189 do { 190 sp = fgets(string, 256, inf); 191 if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { 192 pos = ftello(inf); 193 length = (size_t)(pos - last_off) - 194 (sp != NULL ? strlen(sp) : 0); 195 last_off = pos; 196 if (length == 0) 197 continue; 198 add_offset(outf, pos); 199 if ((size_t)Tbl.str_longlen < length) 200 Tbl.str_longlen = length; 201 if ((size_t)Tbl.str_shortlen > length) 202 Tbl.str_shortlen = length; 203 first = Oflag; 204 } 205 else if (first) { 206 for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) 207 continue; 208 ALLOC(Firstch, Num_pts); 209 fp = &Firstch[Num_pts - 1]; 210 if (Iflag && isupper((unsigned char)*nsp)) 211 fp->first = tolower((unsigned char)*nsp); 212 else 213 fp->first = *nsp; 214 fp->pos = Seekpts[Num_pts - 1]; 215 first = FALSE; 216 } 217 } while (sp != NULL); 218 219 /* 220 * write the tables in 221 */ 222 223 (void) fclose(inf); 224 Tbl.str_numstr = Num_pts - 1; 225 226 if (Cflag) 227 Tbl.str_flags |= STR_COMMENTS; 228 229 if (Oflag) 230 do_order(); 231 else if (Rflag) 232 randomize(); 233 234 if (Xflag) 235 Tbl.str_flags |= STR_ROTATED; 236 237 if (!Sflag) { 238 printf("\"%s\" created\n", Outfile); 239 if (Num_pts == 2) 240 puts("There was 1 string"); 241 else 242 printf("There were %u strings\n", Num_pts - 1); 243 printf("Longest string: %u byte%s\n", Tbl.str_longlen, 244 Tbl.str_longlen == 1 ? "" : "s"); 245 printf("Shortest string: %u byte%s\n", Tbl.str_shortlen, 246 Tbl.str_shortlen == 1 ? "" : "s"); 247 } 248 249 rewind(outf); 250 Tbl.str_version = htobe32(Tbl.str_version); 251 Tbl.str_numstr = htobe32(Tbl.str_numstr); 252 Tbl.str_longlen = htobe32(Tbl.str_longlen); 253 Tbl.str_shortlen = htobe32(Tbl.str_shortlen); 254 Tbl.str_flags = htobe32(Tbl.str_flags); 255 (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); 256 if (STORING_PTRS) { 257 for (p = Seekpts, cnt = Num_pts; cnt--; ++p) 258 *p = htobe64(*p); 259 (void) fwrite(Seekpts, sizeof *Seekpts, (size_t) Num_pts, outf); 260 } 261 (void) fclose(outf); 262 exit(0); 263} 264 265/* 266 * This routine evaluates arguments from the command line 267 */ 268void 269getargs(int argc, char **argv) 270{ 271 int ch; 272 273 while ((ch = getopt(argc, argv, "Cc:iorsx")) != -1) 274 switch(ch) { 275 case 'C': /* embedded comments */ 276 Cflag++; 277 break; 278 case 'c': /* new delimiting char */ 279 Delimch = *optarg; 280 if (!isascii(Delimch)) { 281 printf("bad delimiting character: '\\%o\n'", 282 (unsigned char)Delimch); 283 } 284 break; 285 case 'i': /* ignore case in ordering */ 286 Iflag++; 287 break; 288 case 'o': /* order strings */ 289 Oflag++; 290 break; 291 case 'r': /* randomize pointers */ 292 Rflag++; 293 break; 294 case 's': /* silent */ 295 Sflag++; 296 break; 297 case 'x': /* set the rotated bit */ 298 Xflag++; 299 break; 300 case '?': 301 default: 302 usage(); 303 } 304 argv += optind; 305 306 if (*argv) { 307 Infile = *argv; 308 if (*++argv) 309 (void) strcpy(Outfile, *argv); 310 } 311 if (!Infile) { 312 puts("No input file name"); 313 usage(); 314 } 315 if (*Outfile == '\0') { 316 (void) strcpy(Outfile, Infile); 317 (void) strcat(Outfile, ".dat"); 318 } 319} 320 321void 322usage(void) 323{ 324 (void) fprintf(stderr, 325 "strfile [-Ciorsx] [-c char] source_file [output_file]\n"); 326 exit(1); 327} 328 329/* 330 * add_offset: 331 * Add an offset to the list, or write it out, as appropriate. 332 */ 333void 334add_offset(FILE *fp, off_t off) 335{ 336 off_t beoff; 337 338 if (!STORING_PTRS) { 339 beoff = htobe64(off); 340 fwrite(&beoff, 1, sizeof beoff, fp); 341 } else { 342 ALLOC(Seekpts, Num_pts + 1); 343 Seekpts[Num_pts] = off; 344 } 345 Num_pts++; 346} 347 348/* 349 * do_order: 350 * Order the strings alphabetically (possibly ignoring case). 351 */ 352void 353do_order(void) 354{ 355 uint32_t i; 356 off_t *lp; 357 STR *fp; 358 359 Sort_1 = fopen(Infile, "r"); 360 Sort_2 = fopen(Infile, "r"); 361 qsort(Firstch, (size_t) Tbl.str_numstr, sizeof *Firstch, cmp_str); 362 i = Tbl.str_numstr; 363 lp = Seekpts; 364 fp = Firstch; 365 while (i--) 366 *lp++ = fp++->pos; 367 (void) fclose(Sort_1); 368 (void) fclose(Sort_2); 369 Tbl.str_flags |= STR_ORDERED; 370} 371 372static int 373stable_collate_range_cmp(int c1, int c2) 374{ 375 static char s1[2], s2[2]; 376 int ret; 377 378 s1[0] = c1; 379 s2[0] = c2; 380 if ((ret = strcoll(s1, s2)) != 0) 381 return (ret); 382 return (c1 - c2); 383} 384 385/* 386 * cmp_str: 387 * Compare two strings in the file 388 */ 389int 390cmp_str(const void *s1, const void *s2) 391{ 392 const STR *p1, *p2; 393 int c1, c2; 394 int n1, n2; 395 int r; 396 397# define SET_N(nf,ch) (nf = (ch == '\n')) 398# define IS_END(ch,nf) (ch == EOF || (ch == (unsigned char) Delimch && nf)) 399 400 p1 = (const STR *) s1; 401 p2 = (const STR *) s2; 402 403 c1 = (unsigned char) p1->first; 404 c2 = (unsigned char) p2->first; 405 if ((r = stable_collate_range_cmp(c1, c2)) != 0) 406 return (r); 407 408 (void) fseeko(Sort_1, p1->pos, 0); 409 (void) fseeko(Sort_2, p2->pos, 0); 410 411 n1 = FALSE; 412 n2 = FALSE; 413 while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0' && c1 != EOF) 414 SET_N(n1, c1); 415 while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0' && c2 != EOF) 416 SET_N(n2, c2); 417 418 while (!IS_END(c1, n1) && !IS_END(c2, n2)) { 419 if (Iflag) { 420 if (isupper(c1)) 421 c1 = tolower(c1); 422 if (isupper(c2)) 423 c2 = tolower(c2); 424 } 425 if ((r = stable_collate_range_cmp(c1, c2)) != 0) 426 return (r); 427 SET_N(n1, c1); 428 SET_N(n2, c2); 429 c1 = getc(Sort_1); 430 c2 = getc(Sort_2); 431 } 432 if (IS_END(c1, n1)) 433 c1 = 0; 434 if (IS_END(c2, n2)) 435 c2 = 0; 436 return (stable_collate_range_cmp(c1, c2)); 437} 438 439/* 440 * randomize: 441 * Randomize the order of the string table. We must be careful 442 * not to randomize across delimiter boundaries. All 443 * randomization is done within each block. 444 */ 445void 446randomize(void) 447{ 448 uint32_t cnt, i; 449 off_t tmp; 450 off_t *sp; 451 452#if __FreeBSD_version < 800041 453 srandomdev(); 454#endif 455 456 Tbl.str_flags |= STR_RANDOM; 457 cnt = Tbl.str_numstr; 458 459 /* 460 * move things around randomly 461 */ 462 463 for (sp = Seekpts; cnt > 0; cnt--, sp++) { 464#if __FreeBSD_version < 800041 465 i = random() % cnt; 466#else 467 i = arc4random_uniform(cnt); 468#endif 469 tmp = sp[0]; 470 sp[0] = sp[i]; 471 sp[i] = tmp; 472 } 473} 474