strfile.c revision 51864
12490Sjkh/*- 22490Sjkh * Copyright (c) 1989, 1993 32490Sjkh * The Regents of the University of California. All rights reserved. 42490Sjkh * 52490Sjkh * This code is derived from software contributed to Berkeley by 62490Sjkh * Ken Arnold. 72490Sjkh * 82490Sjkh * Redistribution and use in source and binary forms, with or without 92490Sjkh * modification, are permitted provided that the following conditions 102490Sjkh * are met: 112490Sjkh * 1. Redistributions of source code must retain the above copyright 122490Sjkh * notice, this list of conditions and the following disclaimer. 132490Sjkh * 2. Redistributions in binary form must reproduce the above copyright 142490Sjkh * notice, this list of conditions and the following disclaimer in the 152490Sjkh * documentation and/or other materials provided with the distribution. 162490Sjkh * 3. All advertising materials mentioning features or use of this software 172490Sjkh * must display the following acknowledgement: 182490Sjkh * This product includes software developed by the University of 192490Sjkh * California, Berkeley and its contributors. 202490Sjkh * 4. Neither the name of the University nor the names of its contributors 212490Sjkh * may be used to endorse or promote products derived from this software 222490Sjkh * without specific prior written permission. 232490Sjkh * 242490Sjkh * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 252490Sjkh * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 262490Sjkh * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 272490Sjkh * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 282490Sjkh * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 292490Sjkh * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 302490Sjkh * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 312490Sjkh * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 322490Sjkh * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 332490Sjkh * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 342490Sjkh * SUCH DAMAGE. 352490Sjkh */ 362490Sjkh 372490Sjkh#ifndef lint 3815944Sachestatic const char copyright[] = 392490Sjkh"@(#) Copyright (c) 1989, 1993\n\ 402490Sjkh The Regents of the University of California. All rights reserved.\n"; 412490Sjkh#endif /* not lint */ 422490Sjkh 432490Sjkh#ifndef lint 4451287Speter#if 0 4515944Sachestatic const char sccsid[] = "@(#)strfile.c 8.1 (Berkeley) 5/31/93"; 4651287Speter#else 4751287Speterstatic const char rcsid[] = 4851287Speter "$FreeBSD: head/games/fortune/strfile/strfile.c 51864 1999-10-02 12:33:37Z dcs $"; 4951287Speter#endif 502490Sjkh#endif /* not lint */ 512490Sjkh 522490Sjkh# include <sys/param.h> 532490Sjkh# include <stdio.h> 5415944Sache# include <stdlib.h> 552490Sjkh# include <ctype.h> 5615944Sache# include <string.h> 5715944Sache# include <time.h> 5815944Sache# include <locale.h> 5915944Sache# include <unistd.h> 602490Sjkh# include "strfile.h" 612490Sjkh 622490Sjkh/* 632490Sjkh * This program takes a file composed of strings seperated by 642490Sjkh * lines starting with two consecutive delimiting character (default 652490Sjkh * character is '%') and creates another file which consists of a table 662490Sjkh * describing the file (structure from "strfile.h"), a table of seek 672490Sjkh * pointers to the start of the strings, and the strings, each terminated 682490Sjkh * by a null byte. Usage: 692490Sjkh * 702490Sjkh * % strfile [-iorsx] [ -cC ] sourcefile [ datafile ] 712490Sjkh * 7251864Sdcs * C - Allow comments marked by a double delimiter at line's beginning 732490Sjkh * c - Change delimiting character from '%' to 'C' 742490Sjkh * s - Silent. Give no summary of data processed at the end of 752490Sjkh * the run. 762490Sjkh * o - order the strings in alphabetic order 778856Srgrimes * i - if ordering, ignore case 782490Sjkh * r - randomize the order of the strings 792490Sjkh * x - set rotated bit 802490Sjkh * 812490Sjkh * Ken Arnold Sept. 7, 1978 -- 822490Sjkh * 832490Sjkh * Added ordering options. 842490Sjkh */ 852490Sjkh 862490Sjkh# define TRUE 1 872490Sjkh# define FALSE 0 882490Sjkh 892490Sjkh# define STORING_PTRS (Oflag || Rflag) 902490Sjkh# define CHUNKSIZE 512 912490Sjkh 9215944Sache# define ALLOC(ptr,sz) { \ 932490Sjkh if (ptr == NULL) \ 942490Sjkh ptr = malloc((unsigned int) (CHUNKSIZE * sizeof *ptr)); \ 952490Sjkh else if (((sz) + 1) % CHUNKSIZE == 0) \ 962490Sjkh ptr = realloc((void *) ptr, ((unsigned int) ((sz) + CHUNKSIZE) * sizeof *ptr)); \ 972490Sjkh if (ptr == NULL) { \ 982490Sjkh fprintf(stderr, "out of space\n"); \ 992490Sjkh exit(1); \ 1002490Sjkh } \ 10115944Sache } 1022490Sjkh 1032490Sjkh#ifdef NO_VOID 1042490Sjkh# define void char 1052490Sjkh#endif 1062490Sjkh 1072490Sjkhtypedef struct { 1082490Sjkh char first; 10915944Sache long pos; 1102490Sjkh} STR; 1112490Sjkh 1122490Sjkhchar *Infile = NULL, /* input file name */ 1132490Sjkh Outfile[MAXPATHLEN] = "", /* output file name */ 1142490Sjkh Delimch = '%'; /* delimiting character */ 1152490Sjkh 11651864Sdcsint Cflag = FALSE; /* embedded comments */ 1172490Sjkhint Sflag = FALSE; /* silent run flag */ 1182490Sjkhint Oflag = FALSE; /* ordering flag */ 1192490Sjkhint Iflag = FALSE; /* ignore case flag */ 1202490Sjkhint Rflag = FALSE; /* randomize order flag */ 1212490Sjkhint Xflag = FALSE; /* set rotated bit */ 1222490Sjkhlong Num_pts = 0; /* number of pointers/strings */ 1232490Sjkh 12415944Sachelong *Seekpts; 1252490Sjkh 1262490SjkhFILE *Sort_1, *Sort_2; /* pointers for sorting */ 1272490Sjkh 1282490SjkhSTRFILE Tbl; /* statistics table */ 1292490Sjkh 1302490SjkhSTR *Firstch; /* first chars of each string */ 1312490Sjkh 13215944Sachevoid getargs(), add_offset(), do_order(), randomize(), usage(); 13315944Sacheint cmp_str(); 1342490Sjkh 1352490Sjkh/* 1362490Sjkh * main: 1372490Sjkh * Drive the sucker. There are two main modes -- either we store 1382490Sjkh * the seek pointers, if the table is to be sorted or randomized, 1392490Sjkh * or we write the pointer directly to the file, if we are to stay 1402490Sjkh * in file order. If the former, we allocate and re-allocate in 1412490Sjkh * CHUNKSIZE blocks; if the latter, we just write each pointer, 1422490Sjkh * and then seek back to the beginning to write in the table. 1432490Sjkh */ 14451287Speterint main(ac, av) 1452490Sjkhint ac; 1462490Sjkhchar **av; 1472490Sjkh{ 1482490Sjkh register char *sp, dc; 1492490Sjkh register FILE *inf, *outf; 15015944Sache register long last_off, length, pos, *p; 1512490Sjkh register int first, cnt; 1522490Sjkh register char *nsp; 1532490Sjkh register STR *fp; 1542490Sjkh static char string[257]; 1552490Sjkh 15616140Sache (void) setlocale(LC_ALL, ""); 15715944Sache 1582490Sjkh getargs(ac, av); /* evalute arguments */ 1592490Sjkh dc = Delimch; 1602490Sjkh if ((inf = fopen(Infile, "r")) == NULL) { 1612490Sjkh perror(Infile); 1622490Sjkh exit(1); 1632490Sjkh } 1642490Sjkh 1652490Sjkh if ((outf = fopen(Outfile, "w")) == NULL) { 1662490Sjkh perror(Outfile); 1672490Sjkh exit(1); 1682490Sjkh } 1692490Sjkh if (!STORING_PTRS) 17015944Sache (void) fseek(outf, (long) sizeof Tbl, 0); 1712490Sjkh 1722490Sjkh /* 1732490Sjkh * Write the strings onto the file 1742490Sjkh */ 1752490Sjkh 1762490Sjkh Tbl.str_longlen = 0; 17715944Sache Tbl.str_shortlen = ~((unsigned long) 0); 1782490Sjkh Tbl.str_delim = dc; 1792490Sjkh Tbl.str_version = VERSION; 1802490Sjkh first = Oflag; 1812490Sjkh add_offset(outf, ftell(inf)); 1822490Sjkh last_off = 0; 1832490Sjkh do { 1842490Sjkh sp = fgets(string, 256, inf); 18515944Sache if (sp == NULL || (sp[0] == dc && sp[1] == '\n')) { 1862490Sjkh pos = ftell(inf); 1872490Sjkh length = pos - last_off - (sp ? strlen(sp) : 0); 1882490Sjkh last_off = pos; 1892490Sjkh if (!length) 1902490Sjkh continue; 1912490Sjkh add_offset(outf, pos); 1922490Sjkh if (Tbl.str_longlen < length) 1932490Sjkh Tbl.str_longlen = length; 1942490Sjkh if (Tbl.str_shortlen > length) 1952490Sjkh Tbl.str_shortlen = length; 1962490Sjkh first = Oflag; 1972490Sjkh } 1982490Sjkh else if (first) { 19915944Sache for (nsp = sp; !isalnum((unsigned char)*nsp); nsp++) 2002490Sjkh continue; 2012490Sjkh ALLOC(Firstch, Num_pts); 2022490Sjkh fp = &Firstch[Num_pts - 1]; 20315944Sache if (Iflag && isupper((unsigned char)*nsp)) 20415944Sache fp->first = tolower((unsigned char)*nsp); 2052490Sjkh else 2062490Sjkh fp->first = *nsp; 2072490Sjkh fp->pos = Seekpts[Num_pts - 1]; 2082490Sjkh first = FALSE; 2092490Sjkh } 2102490Sjkh } while (sp != NULL); 2112490Sjkh 2122490Sjkh /* 2132490Sjkh * write the tables in 2142490Sjkh */ 2152490Sjkh 2162490Sjkh (void) fclose(inf); 21733633Ssteve Tbl.str_numstr = Num_pts - 1; 2182490Sjkh 21951864Sdcs if (Cflag) 22051864Sdcs Tbl.str_flags |= STR_COMMENTS; 22151864Sdcs 2222490Sjkh if (Oflag) 2232490Sjkh do_order(); 2242490Sjkh else if (Rflag) 2252490Sjkh randomize(); 2262490Sjkh 2272490Sjkh if (Xflag) 2282490Sjkh Tbl.str_flags |= STR_ROTATED; 2292490Sjkh 2302490Sjkh if (!Sflag) { 2312490Sjkh printf("\"%s\" created\n", Outfile); 2322490Sjkh if (Num_pts == 2) 2332490Sjkh puts("There was 1 string"); 2342490Sjkh else 23515944Sache printf("There were %ld strings\n", Num_pts - 1); 2362490Sjkh printf("Longest string: %lu byte%s\n", Tbl.str_longlen, 2372490Sjkh Tbl.str_longlen == 1 ? "" : "s"); 2382490Sjkh printf("Shortest string: %lu byte%s\n", Tbl.str_shortlen, 2392490Sjkh Tbl.str_shortlen == 1 ? "" : "s"); 2402490Sjkh } 2412490Sjkh 24215944Sache rewind(outf); 2432490Sjkh Tbl.str_version = htonl(Tbl.str_version); 24433633Ssteve Tbl.str_numstr = htonl(Tbl.str_numstr); 2452490Sjkh Tbl.str_longlen = htonl(Tbl.str_longlen); 2462490Sjkh Tbl.str_shortlen = htonl(Tbl.str_shortlen); 2472490Sjkh Tbl.str_flags = htonl(Tbl.str_flags); 2482490Sjkh (void) fwrite((char *) &Tbl, sizeof Tbl, 1, outf); 2492490Sjkh if (STORING_PTRS) { 2502490Sjkh for (p = Seekpts, cnt = Num_pts; cnt--; ++p) 2512490Sjkh *p = htonl(*p); 2522490Sjkh (void) fwrite((char *) Seekpts, sizeof *Seekpts, (int) Num_pts, outf); 2532490Sjkh } 2542490Sjkh (void) fclose(outf); 2552490Sjkh exit(0); 2562490Sjkh} 2572490Sjkh 2582490Sjkh/* 2592490Sjkh * This routine evaluates arguments from the command line 2602490Sjkh */ 26115944Sachevoid getargs(argc, argv) 2622490Sjkhint argc; 2632490Sjkhchar **argv; 2642490Sjkh{ 2652490Sjkh extern char *optarg; 2662490Sjkh extern int optind; 2672490Sjkh int ch; 2682490Sjkh 26951864Sdcs while ((ch = getopt(argc, argv, "Cc:iorsx")) != EOF) 2702490Sjkh switch(ch) { 27151864Sdcs case 'C': /* embedded comments */ 27251864Sdcs Cflag++; 27351864Sdcs break; 2742490Sjkh case 'c': /* new delimiting char */ 2752490Sjkh Delimch = *optarg; 2762490Sjkh if (!isascii(Delimch)) { 2772490Sjkh printf("bad delimiting character: '\\%o\n'", 27815944Sache (unsigned char)Delimch); 2792490Sjkh } 2802490Sjkh break; 2812490Sjkh case 'i': /* ignore case in ordering */ 2822490Sjkh Iflag++; 2832490Sjkh break; 2842490Sjkh case 'o': /* order strings */ 2852490Sjkh Oflag++; 2862490Sjkh break; 2872490Sjkh case 'r': /* randomize pointers */ 2882490Sjkh Rflag++; 2892490Sjkh break; 2902490Sjkh case 's': /* silent */ 2912490Sjkh Sflag++; 2922490Sjkh break; 2932490Sjkh case 'x': /* set the rotated bit */ 2942490Sjkh Xflag++; 2952490Sjkh break; 2962490Sjkh case '?': 2972490Sjkh default: 2982490Sjkh usage(); 2992490Sjkh } 3002490Sjkh argv += optind; 3012490Sjkh 3022490Sjkh if (*argv) { 3032490Sjkh Infile = *argv; 3042490Sjkh if (*++argv) 3052490Sjkh (void) strcpy(Outfile, *argv); 3062490Sjkh } 3072490Sjkh if (!Infile) { 3082490Sjkh puts("No input file name"); 3092490Sjkh usage(); 3102490Sjkh } 3112490Sjkh if (*Outfile == '\0') { 3122490Sjkh (void) strcpy(Outfile, Infile); 3132490Sjkh (void) strcat(Outfile, ".dat"); 3142490Sjkh } 3152490Sjkh} 3162490Sjkh 31715944Sachevoid usage() 3182490Sjkh{ 3192490Sjkh (void) fprintf(stderr, 3202490Sjkh "strfile [-iorsx] [-c char] sourcefile [datafile]\n"); 3212490Sjkh exit(1); 3222490Sjkh} 3232490Sjkh 3242490Sjkh/* 3252490Sjkh * add_offset: 3262490Sjkh * Add an offset to the list, or write it out, as appropriate. 3272490Sjkh */ 32815944Sachevoid add_offset(fp, off) 3292490SjkhFILE *fp; 33015944Sachelong off; 3312490Sjkh{ 33215944Sache long net; 3332490Sjkh 3342490Sjkh if (!STORING_PTRS) { 3352490Sjkh net = htonl(off); 3362490Sjkh fwrite(&net, 1, sizeof net, fp); 3372490Sjkh } else { 3382490Sjkh ALLOC(Seekpts, Num_pts + 1); 3392490Sjkh Seekpts[Num_pts] = off; 3402490Sjkh } 3412490Sjkh Num_pts++; 3422490Sjkh} 3432490Sjkh 3442490Sjkh/* 3452490Sjkh * do_order: 3462490Sjkh * Order the strings alphabetically (possibly ignoring case). 3472490Sjkh */ 34815944Sachevoid do_order() 3492490Sjkh{ 3502490Sjkh register int i; 35115944Sache register long *lp; 3522490Sjkh register STR *fp; 3532490Sjkh 3542490Sjkh Sort_1 = fopen(Infile, "r"); 3552490Sjkh Sort_2 = fopen(Infile, "r"); 3562490Sjkh qsort((char *) Firstch, (int) Tbl.str_numstr, sizeof *Firstch, cmp_str); 3572490Sjkh i = Tbl.str_numstr; 3582490Sjkh lp = Seekpts; 3592490Sjkh fp = Firstch; 3602490Sjkh while (i--) 3612490Sjkh *lp++ = fp++->pos; 3622490Sjkh (void) fclose(Sort_1); 3632490Sjkh (void) fclose(Sort_2); 3642490Sjkh Tbl.str_flags |= STR_ORDERED; 3652490Sjkh} 3662490Sjkh 36719291Sachestatic int collate_range_cmp (c1, c2) 36819288Sache int c1, c2; 36919288Sache{ 37019288Sache static char s1[2], s2[2]; 37119288Sache int ret; 37219288Sache 37319288Sache c1 &= UCHAR_MAX; 37419288Sache c2 &= UCHAR_MAX; 37519288Sache if (c1 == c2) 37619288Sache return (0); 37719288Sache s1[0] = c1; 37819288Sache s2[0] = c2; 37919288Sache if ((ret = strcoll(s1, s2)) != 0) 38019288Sache return (ret); 38119288Sache return (c1 - c2); 38219288Sache} 38319288Sache 3842490Sjkh/* 3852490Sjkh * cmp_str: 3862490Sjkh * Compare two strings in the file 3872490Sjkh */ 38815944Sacheint cmp_str(p1, p2) 3892490SjkhSTR *p1, *p2; 3902490Sjkh{ 3912490Sjkh register int c1, c2; 3922490Sjkh register int n1, n2; 39315944Sache int r; 3942490Sjkh 3952490Sjkh# define SET_N(nf,ch) (nf = (ch == '\n')) 39615944Sache# define IS_END(ch,nf) (ch == EOF || (ch == (unsigned char) Delimch && nf)) 3972490Sjkh 39816262Sache c1 = (unsigned char) p1->first; 39916262Sache c2 = (unsigned char) p2->first; 40017555Sache if ((r = collate_range_cmp(c1, c2)) != 0) 40115944Sache return r; 4022490Sjkh 4032490Sjkh (void) fseek(Sort_1, p1->pos, 0); 4042490Sjkh (void) fseek(Sort_2, p2->pos, 0); 4052490Sjkh 4062490Sjkh n1 = FALSE; 4072490Sjkh n2 = FALSE; 40815944Sache while (!isalnum(c1 = getc(Sort_1)) && c1 != '\0' && c1 != EOF) 4092490Sjkh SET_N(n1, c1); 41015944Sache while (!isalnum(c2 = getc(Sort_2)) && c2 != '\0' && c2 != EOF) 4112490Sjkh SET_N(n2, c2); 4122490Sjkh 4132490Sjkh while (!IS_END(c1, n1) && !IS_END(c2, n2)) { 4142490Sjkh if (Iflag) { 4152490Sjkh if (isupper(c1)) 4162490Sjkh c1 = tolower(c1); 4172490Sjkh if (isupper(c2)) 4182490Sjkh c2 = tolower(c2); 4192490Sjkh } 42017555Sache if ((r = collate_range_cmp(c1, c2)) != 0) 42115944Sache return r; 4222490Sjkh SET_N(n1, c1); 4232490Sjkh SET_N(n2, c2); 4242490Sjkh c1 = getc(Sort_1); 4252490Sjkh c2 = getc(Sort_2); 4262490Sjkh } 4272490Sjkh if (IS_END(c1, n1)) 4282490Sjkh c1 = 0; 4292490Sjkh if (IS_END(c2, n2)) 4302490Sjkh c2 = 0; 43117555Sache return collate_range_cmp(c1, c2); 4322490Sjkh} 4332490Sjkh 4342490Sjkh/* 4352490Sjkh * randomize: 4362490Sjkh * Randomize the order of the string table. We must be careful 4372490Sjkh * not to randomize across delimiter boundaries. All 4382490Sjkh * randomization is done within each block. 4392490Sjkh */ 44015944Sachevoid randomize() 4412490Sjkh{ 4422490Sjkh register int cnt, i; 44315944Sache register long tmp; 44415944Sache register long *sp; 4452490Sjkh 44629774Sache srandomdev(); 4472490Sjkh 4482490Sjkh Tbl.str_flags |= STR_RANDOM; 4492490Sjkh cnt = Tbl.str_numstr; 4502490Sjkh 4512490Sjkh /* 4522490Sjkh * move things around randomly 4532490Sjkh */ 4542490Sjkh 4552490Sjkh for (sp = Seekpts; cnt > 0; cnt--, sp++) { 4562490Sjkh i = random() % cnt; 4572490Sjkh tmp = sp[0]; 4582490Sjkh sp[0] = sp[i]; 4592490Sjkh sp[i] = tmp; 4602490Sjkh } 4612490Sjkh} 462