strfile.c revision 142022
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Ken Arnold. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#if 0 38#ifndef lint 39static const char copyright[] = 40"@(#) Copyright (c) 1989, 1993\n\ 41 The Regents of the University of California. All rights reserved.\n"; 42#endif /* not lint */ 43 44#ifndef lint 45static const char sccsid[] = "@(#)strfile.c 8.1 (Berkeley) 5/31/93"; 46#endif /* not lint */ 47#endif 48#include <sys/cdefs.h> 49__FBSDID("$FreeBSD: head/games/fortune/strfile/strfile.c 142022 2005-02-17 18:06:37Z ru $"); 50 51# include <sys/param.h> 52# include <sys/endian.h> 53# include <stdio.h> 54# include <stdlib.h> 55# include <ctype.h> 56# include <string.h> 57# include <time.h> 58# include <locale.h> 59# include <unistd.h> 60# include "strfile.h" 61 62/* 63 * This program takes a file composed of strings separated by 64 * lines starting with two consecutive delimiting character (default 65 * character is '%') and creates another file which consists of a table 66 * describing the file (structure from "strfile.h"), a table of seek 67 * pointers to the start of the strings, and the strings, each terminated 68 * by a null byte. Usage: 69 * 70 * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] 71 * 72 * C - Allow comments marked by a double delimiter at line's beginning 73 * c - Change delimiting character from '%' to 'C' 74 * s - Silent. Give no summary of data processed at the end of 75 * the run. 76 * o - order the strings in alphabetic order 77 * i - if ordering, ignore case 78 * r - randomize the order of the strings 79 * x - set rotated bit 80 * 81 * Ken Arnold Sept. 7, 1978 -- 82 * 83 * Added ordering options. 84 */ 85 86# define TRUE 1 87# define FALSE 0 88 89# define STORING_PTRS (Oflag || Rflag) 90# define CHUNKSIZE 512 91 92# define ALLOC(ptr,sz) { \ 93 if (ptr == NULL) \ 94 ptr = malloc(CHUNKSIZE * sizeof *ptr); \ 95 else if (((sz) + 1) % CHUNKSIZE == 0) \ 96 ptr = realloc(ptr, ((sz) + CHUNKSIZE) * sizeof *ptr); \ 97 if (ptr == NULL) { \ 98 fprintf(stderr, "out of space\n"); \ 99 exit(1); \ 100 } \ 101 } 102 103#ifdef NO_VOID 104# define void char 105#endif 106 107typedef struct { 108 int first; 109 off_t pos; 110} STR; 111 112static char *Infile = NULL, /* input file name */ 113 Outfile[MAXPATHLEN] = "", /* output file name */ 114 Delimch = '%'; /* delimiting character */ 115 116static int Cflag = FALSE; /* embedded comments */ 117static int Sflag = FALSE; /* silent run flag */ 118static int Oflag = FALSE; /* ordering flag */ 119static int Iflag = FALSE; /* ignore case flag */ 120static int Rflag = FALSE; /* randomize order flag */ 121static int Xflag = FALSE; /* set rotated bit */ 122static uint32_t Num_pts = 0; /* number of pointers/strings */ 123 124static off_t *Seekpts; 125 126static FILE *Sort_1, *Sort_2; /* pointers for sorting */ 127 128static STRFILE Tbl; /* statistics table */ 129 130static STR *Firstch; /* first chars of each string */ 131 132static void add_offset(FILE *, off_t); 133static int cmp_str(const void *, const void *); 134static int stable_collate_range_cmp(int, int); 135static void do_order(void); 136static void getargs(int, char **); 137static void randomize(void); 138static void usage(void); 139 140/* 141 * main: 142 * Drive the sucker. There are two main modes -- either we store 143 * the seek pointers, if the table is to be sorted or randomized, 144 * or we write the pointer directly to the file, if we are to stay 145 * in file order. If the former, we allocate and re-allocate in 146 * CHUNKSIZE blocks; if the latter, we just write each pointer, 147 * and then seek back to the beginning to write in the table. 148 */ 149int main(int ac, char *av[]) 150{ 151 char *sp, dc; 152 FILE *inf, *outf; 153 off_t last_off, pos, *p; 154 size_t length; 155 int first; 156 uint32_t cnt; 157 char *nsp; 158 STR *fp; 159 static char string[257]; 160 161 (void) setlocale(LC_ALL, ""); 162 163 getargs(ac, av); /* evalute arguments */ 164 dc = Delimch; 165 if ((inf = fopen(Infile, "r")) == NULL) { 166 perror(Infile); 167 exit(1); 168 } 169 170 if ((outf = fopen(Outfile, "w")) == NULL) { 171 perror(Outfile); 172 exit(1); 173 } 174 if (!STORING_PTRS) 175 (void) fseek(outf, (long) sizeof Tbl, 0); 176 177 /* 178 * Write the strings onto the file 179 */ 180 181 Tbl.str_longlen = 0; 182 Tbl.str_shortlen = 0xffffffff; 183 Tbl.str_delim = dc; 184 Tbl.str_version = VERSION; 185 first = Oflag; 186 add_offset(outf, ftello(inf)); 187 last_off = 0; 188 do { 189 sp = fgets(string, 256, inf); 190 if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { 191 pos = ftello(inf); 192 length = (size_t)(pos - last_off) - 193 (sp != NULL ? strlen(sp) : 0); 194 last_off = pos; 195 if (length == 0) 196 continue; 197 add_offset(outf, pos); 198 if ((size_t)Tbl.str_longlen < length) 199 Tbl.str_longlen = length; 200 if ((size_t)Tbl.str_shortlen > length) 201 Tbl.str_shortlen = length; 202 first = Oflag; 203 } 204 else if (first) { 205 for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) 206 continue; 207 ALLOC(Firstch, Num_pts); 208 fp = &Firstch[Num_pts - 1]; 209 if (Iflag && isupper((unsigned char)*nsp)) 210 fp->first = tolower((unsigned char)*nsp); 211 else 212 fp->first = *nsp; 213 fp->pos = Seekpts[Num_pts - 1]; 214 first = FALSE; 215 } 216 } while (sp != NULL); 217 218 /* 219 * write the tables in 220 */ 221 222 (void) fclose(inf); 223 Tbl.str_numstr = Num_pts - 1; 224 225 if (Cflag) 226 Tbl.str_flags |= STR_COMMENTS; 227 228 if (Oflag) 229 do_order(); 230 else if (Rflag) 231 randomize(); 232 233 if (Xflag) 234 Tbl.str_flags |= STR_ROTATED; 235 236 if (!Sflag) { 237 printf("\"%s\" created\n", Outfile); 238 if (Num_pts == 2) 239 puts("There was 1 string"); 240 else 241 printf("There were %u strings\n", Num_pts - 1); 242 printf("Longest string: %u byte%s\n", Tbl.str_longlen, 243 Tbl.str_longlen == 1 ? "" : "s"); 244 printf("Shortest string: %u byte%s\n", Tbl.str_shortlen, 245 Tbl.str_shortlen == 1 ? "" : "s"); 246 } 247 248 rewind(outf); 249 Tbl.str_version = htobe32(Tbl.str_version); 250 Tbl.str_numstr = htobe32(Tbl.str_numstr); 251 Tbl.str_longlen = htobe32(Tbl.str_longlen); 252 Tbl.str_shortlen = htobe32(Tbl.str_shortlen); 253 Tbl.str_flags = htobe32(Tbl.str_flags); 254 (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); 255 if (STORING_PTRS) { 256 for (p = Seekpts, cnt = Num_pts; cnt--; ++p) 257 *p = htobe64(*p); 258 (void) fwrite(Seekpts, sizeof *Seekpts, (size_t) Num_pts, outf); 259 } 260 (void) fclose(outf); 261 exit(0); 262} 263 264/* 265 * This routine evaluates arguments from the command line 266 */ 267void getargs(argc, argv) 268int argc; 269char **argv; 270{ 271 int ch; 272 273 while ((ch = getopt(argc, argv, "Cc:iorsx")) != EOF) 274 switch(ch) { 275 case 'C': /* embedded comments */ 276 Cflag++; 277 break; 278 case 'c': /* new delimiting char */ 279 Delimch = *optarg; 280 if (!isascii(Delimch)) { 281 printf("bad delimiting character: '\\%o\n'", 282 (unsigned char)Delimch); 283 } 284 break; 285 case 'i': /* ignore case in ordering */ 286 Iflag++; 287 break; 288 case 'o': /* order strings */ 289 Oflag++; 290 break; 291 case 'r': /* randomize pointers */ 292 Rflag++; 293 break; 294 case 's': /* silent */ 295 Sflag++; 296 break; 297 case 'x': /* set the rotated bit */ 298 Xflag++; 299 break; 300 case '?': 301 default: 302 usage(); 303 } 304 argv += optind; 305 306 if (*argv) { 307 Infile = *argv; 308 if (*++argv) 309 (void) strcpy(Outfile, *argv); 310 } 311 if (!Infile) { 312 puts("No input file name"); 313 usage(); 314 } 315 if (*Outfile == '\0') { 316 (void) strcpy(Outfile, Infile); 317 (void) strcat(Outfile, ".dat"); 318 } 319} 320 321void usage() 322{ 323 (void) fprintf(stderr, 324 "strfile [-Ciorsx] [-c char] source_file [output_file]\n"); 325 exit(1); 326} 327 328/* 329 * add_offset: 330 * Add an offset to the list, or write it out, as appropriate. 331 */ 332void add_offset(fp, off) 333FILE *fp; 334off_t off; 335{ 336 off_t beoff; 337 338 if (!STORING_PTRS) { 339 beoff = htobe64(off); 340 fwrite(&beoff, 1, sizeof beoff, fp); 341 } else { 342 ALLOC(Seekpts, Num_pts + 1); 343 Seekpts[Num_pts] = off; 344 } 345 Num_pts++; 346} 347 348/* 349 * do_order: 350 * Order the strings alphabetically (possibly ignoring case). 351 */ 352void do_order() 353{ 354 uint32_t i; 355 off_t *lp; 356 STR *fp; 357 358 Sort_1 = fopen(Infile, "r"); 359 Sort_2 = fopen(Infile, "r"); 360 qsort(Firstch, (size_t) Tbl.str_numstr, sizeof *Firstch, cmp_str); 361 i = Tbl.str_numstr; 362 lp = Seekpts; 363 fp = Firstch; 364 while (i--) 365 *lp++ = fp++->pos; 366 (void) fclose(Sort_1); 367 (void) fclose(Sort_2); 368 Tbl.str_flags |= STR_ORDERED; 369} 370 371static int stable_collate_range_cmp(c1, c2) 372 int c1, c2; 373{ 374 static char s1[2], s2[2]; 375 int ret; 376 377 s1[0] = c1; 378 s2[0] = c2; 379 if ((ret = strcoll(s1, s2)) != 0) 380 return (ret); 381 return (c1 - c2); 382} 383 384/* 385 * cmp_str: 386 * Compare two strings in the file 387 */ 388int cmp_str(s1, s2) 389const void *s1, *s2; 390{ 391 const STR *p1, *p2; 392 int c1, c2; 393 int n1, n2; 394 int r; 395 396# define SET_N(nf,ch) (nf = (ch == '\n')) 397# define IS_END(ch,nf) (ch == EOF || (ch == (unsigned char) Delimch && nf)) 398 399 p1 = (const STR *) s1; 400 p2 = (const STR *) s2; 401 402 c1 = (unsigned char) p1->first; 403 c2 = (unsigned char) p2->first; 404 if ((r = stable_collate_range_cmp(c1, c2)) != 0) 405 return (r); 406 407 (void) fseeko(Sort_1, p1->pos, 0); 408 (void) fseeko(Sort_2, p2->pos, 0); 409 410 n1 = FALSE; 411 n2 = FALSE; 412 while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0' && c1 != EOF) 413 SET_N(n1, c1); 414 while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0' && c2 != EOF) 415 SET_N(n2, c2); 416 417 while (!IS_END(c1, n1) && !IS_END(c2, n2)) { 418 if (Iflag) { 419 if (isupper(c1)) 420 c1 = tolower(c1); 421 if (isupper(c2)) 422 c2 = tolower(c2); 423 } 424 if ((r = stable_collate_range_cmp(c1, c2)) != 0) 425 return (r); 426 SET_N(n1, c1); 427 SET_N(n2, c2); 428 c1 = getc(Sort_1); 429 c2 = getc(Sort_2); 430 } 431 if (IS_END(c1, n1)) 432 c1 = 0; 433 if (IS_END(c2, n2)) 434 c2 = 0; 435 return (stable_collate_range_cmp(c1, c2)); 436} 437 438/* 439 * randomize: 440 * Randomize the order of the string table. We must be careful 441 * not to randomize across delimiter boundaries. All 442 * randomization is done within each block. 443 */ 444void randomize() 445{ 446 uint32_t cnt, i; 447 off_t tmp; 448 off_t *sp; 449 450 srandomdev(); 451 452 Tbl.str_flags |= STR_RANDOM; 453 cnt = Tbl.str_numstr; 454 455 /* 456 * move things around randomly 457 */ 458 459 for (sp = Seekpts; cnt > 0; cnt--, sp++) { 460 i = random() % cnt; 461 tmp = sp[0]; 462 sp[0] = sp[i]; 463 sp[i] = tmp; 464 } 465} 466