1/* $NetBSD: texindex.c,v 1.1.1.7 2008/09/02 07:50:59 christos Exp $ */ 2 3/* texindex -- sort TeX index dribble output into an actual index. 4 Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp 5 6 Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001, 7 2002, 2003, 2004 Free Software Foundation, Inc. 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 2, or (at your option) 12 any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software 21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */ 22 23#include "system.h" 24#include <getopt.h> 25 26static char *program_name = "texindex"; 27 28#if defined (emacs) 29# include "../src/config.h" 30/* Some s/os.h files redefine these. */ 31# undef read 32# undef close 33# undef write 34# undef open 35#endif 36 37#if !defined (HAVE_MEMSET) 38#undef memset 39#define memset(ptr, ignore, count) bzero (ptr, count) 40#endif 41 42#if !defined (SEEK_SET) 43# define SEEK_SET 0 44# define SEEK_CUR 1 45# define SEEK_END 2 46#endif /* !SEEK_SET */ 47 48/* When sorting in core, this structure describes one line 49 and the position and length of its first keyfield. */ 50struct lineinfo 51{ 52 char *text; /* The actual text of the line. */ 53 union { 54 char *text; /* The start of the key (for textual comparison). */ 55 long number; /* The numeric value (for numeric comparison). */ 56 } key; 57 long keylen; /* Length of KEY field. */ 58}; 59 60/* This structure describes a field to use as a sort key. */ 61struct keyfield 62{ 63 int startwords; /* Number of words to skip. */ 64 int startchars; /* Number of additional chars to skip. */ 65 int endwords; /* Number of words to ignore at end. */ 66 int endchars; /* Ditto for characters of last word. */ 67 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */ 68 char fold_case; /* Non-zero means case doesn't matter. */ 69 char reverse; /* Non-zero means compare in reverse order. */ 70 char numeric; /* Non-zeros means field is ASCII numeric. */ 71 char positional; /* Sort according to file position. */ 72 char braced; /* Count balanced-braced groupings as fields. */ 73}; 74 75/* Vector of keyfields to use. */ 76struct keyfield keyfields[3]; 77 78/* Number of keyfields stored in that vector. */ 79int num_keyfields = 3; 80 81/* Vector of input file names, terminated with a null pointer. */ 82char **infiles; 83 84/* Vector of corresponding output file names, or NULL, meaning default it 85 (add an `s' to the end). */ 86char **outfiles; 87 88/* Length of `infiles'. */ 89int num_infiles; 90 91/* Pointer to the array of pointers to lines being sorted. */ 92char **linearray; 93 94/* The allocated length of `linearray'. */ 95long nlines; 96 97/* During in-core sort, this points to the base of the data block 98 which contains all the lines of data. */ 99char *text_base; 100 101/* Initially 0; changed to 1 if we want initials in this index. */ 102int need_initials; 103 104/* Remembers the first initial letter seen in this index, so we can 105 determine whether we need initials in the sorted form. */ 106char first_initial; 107 108/* Forward declarations of functions in this file. */ 109void decode_command (int argc, char **argv); 110void sort_in_core (char *infile, int total, char *outfile); 111char **parsefile (char *filename, char **nextline, char *data, long int size); 112char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr); 113char *find_pos (char *str, int words, int chars, int ignore_blanks); 114long find_value (char *start, long int length); 115char *find_braced_pos (char *str, int words, int chars, int ignore_blanks); 116char *find_braced_end (char *str); 117void writelines (char **linearray, int nlines, FILE *ostream); 118int compare_field (struct keyfield *keyfield, char *start1, 119 long int length1, long int pos1, char *start2, 120 long int length2, long int pos2); 121int compare_full (const void *, const void *); 122void pfatal_with_name (const char *name); 123void fatal (const char *format, const char *arg); 124void error (const char *format, const char *arg); 125void *xmalloc (), *xrealloc (); 126static char *concat3 (const char *, const char *, const char *); 127 128int 129main (int argc, char **argv) 130{ 131 int i; 132 133#ifdef HAVE_SETLOCALE 134 /* Set locale via LC_ALL. */ 135 setlocale (LC_ALL, ""); 136#endif 137 138 /* Set the text message domain. */ 139 bindtextdomain (PACKAGE, LOCALEDIR); 140 textdomain (PACKAGE); 141 142 /* In case we write to a redirected stdout that fails. */ 143 /* not ready atexit (close_stdout); */ 144 145 /* Describe the kind of sorting to do. */ 146 /* The first keyfield uses the first braced field and folds case. */ 147 keyfields[0].braced = 1; 148 keyfields[0].fold_case = 1; 149 keyfields[0].endwords = -1; 150 keyfields[0].endchars = -1; 151 152 /* The second keyfield uses the second braced field, numerically. */ 153 keyfields[1].braced = 1; 154 keyfields[1].numeric = 1; 155 keyfields[1].startwords = 1; 156 keyfields[1].endwords = -1; 157 keyfields[1].endchars = -1; 158 159 /* The third keyfield (which is ignored while discarding duplicates) 160 compares the whole line. */ 161 keyfields[2].endwords = -1; 162 keyfields[2].endchars = -1; 163 164 decode_command (argc, argv); 165 166 /* Process input files completely, one by one. */ 167 168 for (i = 0; i < num_infiles; i++) 169 { 170 int desc; 171 off_t ptr; 172 char *outfile; 173 struct stat instat; 174 175 desc = open (infiles[i], O_RDONLY, 0); 176 if (desc < 0) 177 pfatal_with_name (infiles[i]); 178 179 if (stat (infiles[i], &instat)) 180 pfatal_with_name (infiles[i]); 181 if (S_ISDIR (instat.st_mode)) 182 { 183#ifdef EISDIR 184 errno = EISDIR; 185#endif 186 pfatal_with_name (infiles[i]); 187 } 188 189 lseek (desc, (off_t) 0, SEEK_END); 190 ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR); 191 192 close (desc); 193 194 outfile = outfiles[i]; 195 if (!outfile) 196 outfile = concat3 (infiles[i], "s", ""); 197 198 need_initials = 0; 199 first_initial = '\0'; 200 201 if (ptr != (int)ptr) 202 { 203 fprintf (stderr, "%s: %s: file too large\n", program_name, 204 infiles[i]); 205 xexit (1); 206 } 207 sort_in_core (infiles[i], (int)ptr, outfile); 208 } 209 210 xexit (0); 211 return 0; /* Avoid bogus warnings. */ 212} 213 214typedef struct 215{ 216 char *long_name; 217 char *short_name; 218 int *variable_ref; 219 int variable_value; 220 char *arg_name; 221 char *doc_string; 222} TEXINDEX_OPTION; 223 224TEXINDEX_OPTION texindex_options[] = { 225 { "--help", "-h", (int *)NULL, 0, (char *)NULL, 226 N_("display this help and exit") }, 227 { "--output", "-o", (int *)NULL, 0, "FILE", 228 N_("send output to FILE") }, 229 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL, 230 N_("display version information and exit") }, 231 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL } 232}; 233 234void 235usage (int result_value) 236{ 237 register int i; 238 FILE *f = result_value ? stderr : stdout; 239 240 fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name); 241 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n")); 242 /* Avoid trigraph nonsense. */ 243 fprintf (f, 244_("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"), 245 '?', '?'); /* avoid trigraph in cat-id-tbl.c */ 246 fprintf (f, _("\nOptions:\n")); 247 248 for (i = 0; texindex_options[i].long_name; i++) 249 { 250 putc (' ', f); 251 252 if (texindex_options[i].short_name) 253 fprintf (f, "%s, ", texindex_options[i].short_name); 254 255 fprintf (f, "%s %s", 256 texindex_options[i].long_name, 257 texindex_options[i].arg_name 258 ? texindex_options[i].arg_name : ""); 259 260 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string)); 261 } 262 fputs (_("\n\ 263Email bug reports to bug-texinfo@gnu.org,\n\ 264general questions and discussion to help-texinfo@gnu.org.\n\ 265Texinfo home page: http://www.gnu.org/software/texinfo/"), f); 266 fputs ("\n", f); 267 268 xexit (result_value); 269} 270 271/* Decode the command line arguments to set the parameter variables 272 and set up the vector of keyfields and the vector of input files. */ 273 274void 275decode_command (int argc, char **argv) 276{ 277 int arg_index = 1; 278 char **ip; 279 char **op; 280 281 /* Allocate ARGC input files, which must be enough. */ 282 283 infiles = (char **) xmalloc (argc * sizeof (char *)); 284 outfiles = (char **) xmalloc (argc * sizeof (char *)); 285 ip = infiles; 286 op = outfiles; 287 288 while (arg_index < argc) 289 { 290 char *arg = argv[arg_index++]; 291 292 if (*arg == '-') 293 { 294 if (strcmp (arg, "--version") == 0) 295 { 296 printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION); 297 puts (""); 298 puts ("Copyright (C) 2004 Free Software Foundation, Inc."); 299 printf (_("There is NO warranty. You may redistribute this software\n\ 300under the terms of the GNU General Public License.\n\ 301For more information about these matters, see the files named COPYING.\n")); 302 xexit (0); 303 } 304 else if ((strcmp (arg, "--keep") == 0) || 305 (strcmp (arg, "-k") == 0)) 306 { 307 /* Ignore, for backward compatibility */ 308 } 309 else if ((strcmp (arg, "--help") == 0) || 310 (strcmp (arg, "-h") == 0)) 311 { 312 usage (0); 313 } 314 else if ((strcmp (arg, "--output") == 0) || 315 (strcmp (arg, "-o") == 0)) 316 { 317 if (argv[arg_index] != (char *)NULL) 318 { 319 arg_index++; 320 if (op > outfiles) 321 *(op - 1) = argv[arg_index]; 322 } 323 else 324 usage (1); 325 } 326 else 327 usage (1); 328 } 329 else 330 { 331 *ip++ = arg; 332 *op++ = (char *)NULL; 333 } 334 } 335 336 /* Record number of keyfields and terminate list of filenames. */ 337 num_infiles = ip - infiles; 338 *ip = (char *)NULL; 339 if (num_infiles == 0) 340 usage (1); 341} 342 343/* Compare LINE1 and LINE2 according to the specified set of keyfields. */ 344 345int 346compare_full (const void *p1, const void *p2) 347{ 348 char **line1 = (char **) p1; 349 char **line2 = (char **) p2; 350 int i; 351 352 /* Compare using the first keyfield; 353 if that does not distinguish the lines, try the second keyfield; 354 and so on. */ 355 356 for (i = 0; i < num_keyfields; i++) 357 { 358 long length1, length2; 359 char *start1 = find_field (&keyfields[i], *line1, &length1); 360 char *start2 = find_field (&keyfields[i], *line2, &length2); 361 int tem = compare_field (&keyfields[i], start1, length1, 362 *line1 - text_base, 363 start2, length2, *line2 - text_base); 364 if (tem) 365 { 366 if (keyfields[i].reverse) 367 return -tem; 368 return tem; 369 } 370 } 371 372 return 0; /* Lines match exactly. */ 373} 374 375/* Compare LINE1 and LINE2, described by structures 376 in which the first keyfield is identified in advance. 377 For positional sorting, assumes that the order of the lines in core 378 reflects their nominal order. */ 379int 380compare_prepared (const void *p1, const void *p2) 381{ 382 struct lineinfo *line1 = (struct lineinfo *) p1; 383 struct lineinfo *line2 = (struct lineinfo *) p2; 384 int i; 385 int tem; 386 char *text1, *text2; 387 388 /* Compare using the first keyfield, which has been found for us already. */ 389 if (keyfields->positional) 390 { 391 if (line1->text - text_base > line2->text - text_base) 392 tem = 1; 393 else 394 tem = -1; 395 } 396 else if (keyfields->numeric) 397 tem = line1->key.number - line2->key.number; 398 else 399 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, 400 line2->key.text, line2->keylen, 0); 401 if (tem) 402 { 403 if (keyfields->reverse) 404 return -tem; 405 return tem; 406 } 407 408 text1 = line1->text; 409 text2 = line2->text; 410 411 /* Compare using the second keyfield; 412 if that does not distinguish the lines, try the third keyfield; 413 and so on. */ 414 415 for (i = 1; i < num_keyfields; i++) 416 { 417 long length1, length2; 418 char *start1 = find_field (&keyfields[i], text1, &length1); 419 char *start2 = find_field (&keyfields[i], text2, &length2); 420 int tem = compare_field (&keyfields[i], start1, length1, 421 text1 - text_base, 422 start2, length2, text2 - text_base); 423 if (tem) 424 { 425 if (keyfields[i].reverse) 426 return -tem; 427 return tem; 428 } 429 } 430 431 return 0; /* Lines match exactly. */ 432} 433 434/* Like compare_full but more general. 435 You can pass any strings, and you can say how many keyfields to use. 436 POS1 and POS2 should indicate the nominal positional ordering of 437 the two lines in the input. */ 438 439int 440compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields) 441{ 442 int i; 443 444 /* Compare using the first keyfield; 445 if that does not distinguish the lines, try the second keyfield; 446 and so on. */ 447 448 for (i = 0; i < use_keyfields; i++) 449 { 450 long length1, length2; 451 char *start1 = find_field (&keyfields[i], str1, &length1); 452 char *start2 = find_field (&keyfields[i], str2, &length2); 453 int tem = compare_field (&keyfields[i], start1, length1, pos1, 454 start2, length2, pos2); 455 if (tem) 456 { 457 if (keyfields[i].reverse) 458 return -tem; 459 return tem; 460 } 461 } 462 463 return 0; /* Lines match exactly. */ 464} 465 466/* Find the start and length of a field in STR according to KEYFIELD. 467 A pointer to the starting character is returned, and the length 468 is stored into the int that LENGTHPTR points to. */ 469 470char * 471find_field (struct keyfield *keyfield, char *str, long int *lengthptr) 472{ 473 char *start; 474 char *end; 475 char *(*fun) (); 476 477 if (keyfield->braced) 478 fun = find_braced_pos; 479 else 480 fun = find_pos; 481 482 start = (*fun) (str, keyfield->startwords, keyfield->startchars, 483 keyfield->ignore_blanks); 484 if (keyfield->endwords < 0) 485 { 486 if (keyfield->braced) 487 end = find_braced_end (start); 488 else 489 { 490 end = start; 491 while (*end && *end != '\n') 492 end++; 493 } 494 } 495 else 496 { 497 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0); 498 if (end - str < start - str) 499 end = start; 500 } 501 *lengthptr = end - start; 502 return start; 503} 504 505/* Return a pointer to a specified place within STR, 506 skipping (from the beginning) WORDS words and then CHARS chars. 507 If IGNORE_BLANKS is nonzero, we skip all blanks 508 after finding the specified word. */ 509 510char * 511find_pos (char *str, int words, int chars, int ignore_blanks) 512{ 513 int i; 514 char *p = str; 515 516 for (i = 0; i < words; i++) 517 { 518 char c; 519 /* Find next bunch of nonblanks and skip them. */ 520 while ((c = *p) == ' ' || c == '\t') 521 p++; 522 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) 523 p++; 524 if (!*p || *p == '\n') 525 return p; 526 } 527 528 while (*p == ' ' || *p == '\t') 529 p++; 530 531 for (i = 0; i < chars; i++) 532 { 533 if (!*p || *p == '\n') 534 break; 535 p++; 536 } 537 return p; 538} 539 540/* Like find_pos but assumes that each field is surrounded by braces 541 and that braces within fields are balanced. */ 542 543char * 544find_braced_pos (char *str, int words, int chars, int ignore_blanks) 545{ 546 int i; 547 int bracelevel; 548 char *p = str; 549 char c; 550 551 for (i = 0; i < words; i++) 552 { 553 bracelevel = 1; 554 while ((c = *p++) != '{' && c != '\n' && c) 555 /* Do nothing. */ ; 556 if (c != '{') 557 return p - 1; 558 while (bracelevel) 559 { 560 c = *p++; 561 if (c == '{') 562 bracelevel++; 563 if (c == '}') 564 bracelevel--; 565 if (c == 0 || c == '\n') 566 return p - 1; 567 } 568 } 569 570 while ((c = *p++) != '{' && c != '\n' && c) 571 /* Do nothing. */ ; 572 573 if (c != '{') 574 return p - 1; 575 576 if (ignore_blanks) 577 while ((c = *p) == ' ' || c == '\t') 578 p++; 579 580 for (i = 0; i < chars; i++) 581 { 582 if (!*p || *p == '\n') 583 break; 584 p++; 585 } 586 return p; 587} 588 589/* Find the end of the balanced-brace field which starts at STR. 590 The position returned is just before the closing brace. */ 591 592char * 593find_braced_end (char *str) 594{ 595 int bracelevel; 596 char *p = str; 597 char c; 598 599 bracelevel = 1; 600 while (bracelevel) 601 { 602 c = *p++; 603 if (c == '{') 604 bracelevel++; 605 if (c == '}') 606 bracelevel--; 607 if (c == 0 || c == '\n') 608 return p - 1; 609 } 610 return p - 1; 611} 612 613long 614find_value (char *start, long int length) 615{ 616 while (length != 0L) 617 { 618 if (isdigit (*start)) 619 return atol (start); 620 length--; 621 start++; 622 } 623 return 0l; 624} 625 626/* Vector used to translate characters for comparison. 627 This is how we make all alphanumerics follow all else, 628 and ignore case in the first sorting. */ 629int char_order[256]; 630 631void 632init_char_order (void) 633{ 634 int i; 635 for (i = 1; i < 256; i++) 636 char_order[i] = i; 637 638 for (i = '0'; i <= '9'; i++) 639 char_order[i] += 512; 640 641 for (i = 'a'; i <= 'z'; i++) 642 { 643 char_order[i] = 512 + i; 644 char_order[i + 'A' - 'a'] = 512 + i; 645 } 646} 647 648/* Compare two fields (each specified as a start pointer and a character count) 649 according to KEYFIELD. 650 The sign of the value reports the relation between the fields. */ 651 652int 653compare_field (struct keyfield *keyfield, char *start1, long int length1, 654 long int pos1, char *start2, long int length2, long int pos2) 655{ 656 if (keyfields->positional) 657 { 658 if (pos1 > pos2) 659 return 1; 660 else 661 return -1; 662 } 663 if (keyfield->numeric) 664 { 665 long value = find_value (start1, length1) - find_value (start2, length2); 666 if (value > 0) 667 return 1; 668 if (value < 0) 669 return -1; 670 return 0; 671 } 672 else 673 { 674 char *p1 = start1; 675 char *p2 = start2; 676 char *e1 = start1 + length1; 677 char *e2 = start2 + length2; 678 679 while (1) 680 { 681 int c1, c2; 682 683 if (p1 == e1) 684 c1 = 0; 685 else 686 c1 = *p1++; 687 if (p2 == e2) 688 c2 = 0; 689 else 690 c2 = *p2++; 691 692 if (char_order[c1] != char_order[c2]) 693 return char_order[c1] - char_order[c2]; 694 if (!c1) 695 break; 696 } 697 698 /* Strings are equal except possibly for case. */ 699 p1 = start1; 700 p2 = start2; 701 while (1) 702 { 703 int c1, c2; 704 705 if (p1 == e1) 706 c1 = 0; 707 else 708 c1 = *p1++; 709 if (p2 == e2) 710 c2 = 0; 711 else 712 c2 = *p2++; 713 714 if (c1 != c2) 715 /* Reverse sign here so upper case comes out last. */ 716 return c2 - c1; 717 if (!c1) 718 break; 719 } 720 721 return 0; 722 } 723} 724 725/* Sort INFILE, whose size is TOTAL, 726 assuming that is small enough to be done in-core, 727 then indexify it and send the output to OUTFILE (or to stdout). */ 728 729void 730sort_in_core (char *infile, int total, char *outfile) 731{ 732 char **nextline; 733 char *data = (char *) xmalloc (total + 1); 734 char *file_data; 735 long file_size; 736 int i; 737 FILE *ostream = stdout; 738 struct lineinfo *lineinfo; 739 740 /* Read the contents of the file into the moby array `data'. */ 741 742 int desc = open (infile, O_RDONLY, 0); 743 744 if (desc < 0) 745 fatal (_("failure reopening %s"), infile); 746 for (file_size = 0;;) 747 { 748 i = read (desc, data + file_size, total - file_size); 749 if (i <= 0) 750 break; 751 file_size += i; 752 } 753 file_data = data; 754 data[file_size] = 0; 755 756 close (desc); 757 758 if (file_size > 0 && data[0] != '\\' && data[0] != '@') 759 { 760 error (_("%s: not a texinfo index file"), infile); 761 return; 762 } 763 764 init_char_order (); 765 766 /* Sort routines want to know this address. */ 767 768 text_base = data; 769 770 /* Create the array of pointers to lines, with a default size 771 frequently enough. */ 772 773 nlines = total / 50; 774 if (!nlines) 775 nlines = 2; 776 linearray = (char **) xmalloc (nlines * sizeof (char *)); 777 778 /* `nextline' points to the next free slot in this array. 779 `nlines' is the allocated size. */ 780 781 nextline = linearray; 782 783 /* Parse the input file's data, and make entries for the lines. */ 784 785 nextline = parsefile (infile, nextline, file_data, file_size); 786 if (nextline == 0) 787 { 788 error (_("%s: not a texinfo index file"), infile); 789 return; 790 } 791 792 /* Sort the lines. */ 793 794 /* If we have enough space, find the first keyfield of each line in advance. 795 Make a `struct lineinfo' for each line, which records the keyfield 796 as well as the line, and sort them. */ 797 798 lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo)); 799 800 if (lineinfo) 801 { 802 struct lineinfo *lp; 803 char **p; 804 805 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) 806 { 807 lp->text = *p; 808 lp->key.text = find_field (keyfields, *p, &lp->keylen); 809 if (keyfields->numeric) 810 lp->key.number = find_value (lp->key.text, lp->keylen); 811 } 812 813 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), 814 compare_prepared); 815 816 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) 817 *p = lp->text; 818 819 free (lineinfo); 820 } 821 else 822 qsort (linearray, nextline - linearray, sizeof (char *), compare_full); 823 824 /* Open the output file. */ 825 826 if (outfile) 827 { 828 ostream = fopen (outfile, "w"); 829 if (!ostream) 830 pfatal_with_name (outfile); 831 } 832 833 writelines (linearray, nextline - linearray, ostream); 834 if (outfile) 835 fclose (ostream); 836 837 free (linearray); 838 free (data); 839} 840 841/* Parse an input string in core into lines. 842 DATA is the input string, and SIZE is its length. 843 Data goes in LINEARRAY starting at NEXTLINE. 844 The value returned is the first entry in LINEARRAY still unused. 845 Value 0 means input file contents are invalid. */ 846 847char ** 848parsefile (char *filename, char **nextline, char *data, long int size) 849{ 850 char *p, *end; 851 char **line = nextline; 852 853 p = data; 854 end = p + size; 855 *end = 0; 856 857 while (p != end) 858 { 859 if (p[0] != '\\' && p[0] != '@') 860 return 0; 861 862 *line = p; 863 864 /* Find the first letter of the first field of this line. If it 865 is different from the first letter of the first field of the 866 first line, we need initial headers in the output index. */ 867 while (*p && *p != '{') 868 p++; 869 if (p == end) 870 return 0; 871 p++; 872 if (first_initial) 873 { 874 if (first_initial != toupper (*p)) 875 need_initials = 1; 876 } 877 else 878 first_initial = toupper (*p); 879 880 while (*p && *p != '\n') 881 p++; 882 if (p != end) 883 p++; 884 885 line++; 886 if (line == linearray + nlines) 887 { 888 char **old = linearray; 889 linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4)); 890 line += linearray - old; 891 } 892 } 893 894 return line; 895} 896 897/* Indexification is a filter applied to the sorted lines 898 as they are being written to the output file. 899 Multiple entries for the same name, with different page numbers, 900 get combined into a single entry with multiple page numbers. 901 The first braced field, which is used for sorting, is discarded. 902 However, its first character is examined, folded to lower case, 903 and if it is different from that in the previous line fed to us 904 a \initial line is written with one argument, the new initial. 905 906 If an entry has four braced fields, then the second and third 907 constitute primary and secondary names. 908 In this case, each change of primary name 909 generates a \primary line which contains only the primary name, 910 and in between these are \secondary lines which contain 911 just a secondary name and page numbers. */ 912 913/* The last primary name we wrote a \primary entry for. 914 If only one level of indexing is being done, this is the last name seen. */ 915char *lastprimary; 916/* Length of storage allocated for lastprimary. */ 917int lastprimarylength; 918 919/* Similar, for the secondary name. */ 920char *lastsecondary; 921int lastsecondarylength; 922 923/* Zero if we are not in the middle of writing an entry. 924 One if we have written the beginning of an entry but have not 925 yet written any page numbers into it. 926 Greater than one if we have written the beginning of an entry 927 plus at least one page number. */ 928int pending; 929 930/* The initial (for sorting purposes) of the last primary entry written. 931 When this changes, a \initial {c} line is written */ 932 933char *lastinitial; 934 935int lastinitiallength; 936 937/* When we need a string of length 1 for the value of lastinitial, 938 store it here. */ 939 940char lastinitial1[2]; 941 942/* Initialize static storage for writing an index. */ 943 944void 945init_index (void) 946{ 947 pending = 0; 948 lastinitial = lastinitial1; 949 lastinitial1[0] = 0; 950 lastinitial1[1] = 0; 951 lastinitiallength = 0; 952 lastprimarylength = 100; 953 lastprimary = (char *) xmalloc (lastprimarylength + 1); 954 memset (lastprimary, '\0', lastprimarylength + 1); 955 lastsecondarylength = 100; 956 lastsecondary = (char *) xmalloc (lastsecondarylength + 1); 957 memset (lastsecondary, '\0', lastsecondarylength + 1); 958} 959 960/* Indexify. Merge entries for the same name, 961 insert headers for each initial character, etc. */ 962 963void 964indexify (char *line, FILE *ostream) 965{ 966 char *primary, *secondary, *pagenumber; 967 int primarylength, secondarylength = 0, pagelength; 968 int nosecondary; 969 int initiallength; 970 char *initial; 971 char initial1[2]; 972 register char *p; 973 974 /* First, analyze the parts of the entry fed to us this time. */ 975 976 p = find_braced_pos (line, 0, 0, 0); 977 if (*p == '{') 978 { 979 initial = p; 980 /* Get length of inner pair of braces starting at `p', 981 including that inner pair of braces. */ 982 initiallength = find_braced_end (p + 1) + 1 - p; 983 } 984 else 985 { 986 initial = initial1; 987 initial1[0] = toupper (*p); 988 initial1[1] = 0; 989 initiallength = 1; 990 } 991 992 pagenumber = find_braced_pos (line, 1, 0, 0); 993 pagelength = find_braced_end (pagenumber) - pagenumber; 994 if (pagelength == 0) 995 fatal (_("No page number in %s"), line); 996 997 primary = find_braced_pos (line, 2, 0, 0); 998 primarylength = find_braced_end (primary) - primary; 999 1000 secondary = find_braced_pos (line, 3, 0, 0); 1001 nosecondary = !*secondary; 1002 if (!nosecondary) 1003 secondarylength = find_braced_end (secondary) - secondary; 1004 1005 /* If the primary is different from before, make a new primary entry. */ 1006 if (strncmp (primary, lastprimary, primarylength)) 1007 { 1008 /* Close off current secondary entry first, if one is open. */ 1009 if (pending) 1010 { 1011 fputs ("}\n", ostream); 1012 pending = 0; 1013 } 1014 1015 /* If this primary has a different initial, include an entry for 1016 the initial. */ 1017 if (need_initials && 1018 (initiallength != lastinitiallength || 1019 strncmp (initial, lastinitial, initiallength))) 1020 { 1021 fprintf (ostream, "\\initial {"); 1022 fwrite (initial, 1, initiallength, ostream); 1023 fputs ("}\n", ostream); 1024 if (initial == initial1) 1025 { 1026 lastinitial = lastinitial1; 1027 *lastinitial1 = *initial1; 1028 } 1029 else 1030 { 1031 lastinitial = initial; 1032 } 1033 lastinitiallength = initiallength; 1034 } 1035 1036 /* Make the entry for the primary. */ 1037 if (nosecondary) 1038 fputs ("\\entry {", ostream); 1039 else 1040 fputs ("\\primary {", ostream); 1041 fwrite (primary, primarylength, 1, ostream); 1042 if (nosecondary) 1043 { 1044 fputs ("}{", ostream); 1045 pending = 1; 1046 } 1047 else 1048 fputs ("}\n", ostream); 1049 1050 /* Record name of most recent primary. */ 1051 if (lastprimarylength < primarylength) 1052 { 1053 lastprimarylength = primarylength + 100; 1054 lastprimary = (char *) xrealloc (lastprimary, 1055 1 + lastprimarylength); 1056 } 1057 strncpy (lastprimary, primary, primarylength); 1058 lastprimary[primarylength] = 0; 1059 1060 /* There is no current secondary within this primary, now. */ 1061 lastsecondary[0] = 0; 1062 } 1063 1064 /* Should not have an entry with no subtopic following one with a 1065 subtopic. */ 1066 1067 if (nosecondary && *lastsecondary) 1068 error (_("entry %s follows an entry with a secondary name"), line); 1069 1070 /* Start a new secondary entry if necessary. */ 1071 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) 1072 { 1073 if (pending) 1074 { 1075 fputs ("}\n", ostream); 1076 pending = 0; 1077 } 1078 1079 /* Write the entry for the secondary. */ 1080 fputs ("\\secondary {", ostream); 1081 fwrite (secondary, secondarylength, 1, ostream); 1082 fputs ("}{", ostream); 1083 pending = 1; 1084 1085 /* Record name of most recent secondary. */ 1086 if (lastsecondarylength < secondarylength) 1087 { 1088 lastsecondarylength = secondarylength + 100; 1089 lastsecondary = (char *) xrealloc (lastsecondary, 1090 1 + lastsecondarylength); 1091 } 1092 strncpy (lastsecondary, secondary, secondarylength); 1093 lastsecondary[secondarylength] = 0; 1094 } 1095 1096 /* Here to add one more page number to the current entry. */ 1097 if (pending++ != 1) 1098 fputs (", ", ostream); /* Punctuate first, if this is not the first. */ 1099 fwrite (pagenumber, pagelength, 1, ostream); 1100} 1101 1102/* Close out any unfinished output entry. */ 1103 1104void 1105finish_index (FILE *ostream) 1106{ 1107 if (pending) 1108 fputs ("}\n", ostream); 1109 free (lastprimary); 1110 free (lastsecondary); 1111} 1112 1113/* Copy the lines in the sorted order. 1114 Each line is copied out of the input file it was found in. */ 1115 1116void 1117writelines (char **linearray, int nlines, FILE *ostream) 1118{ 1119 char **stop_line = linearray + nlines; 1120 char **next_line; 1121 1122 init_index (); 1123 1124 /* Output the text of the lines, and free the buffer space. */ 1125 1126 for (next_line = linearray; next_line != stop_line; next_line++) 1127 { 1128 /* Output the line only if distinct from previous one. */ 1129 if (next_line == linearray 1130 /* Compare previous line with this one, using only the 1131 explicitly specd keyfields. */ 1132 || compare_general (*(next_line - 1), *next_line, 0L, 0L, 1133 num_keyfields - 1)) 1134 { 1135 char *p = *next_line; 1136 char c; 1137 1138 while ((c = *p++) && c != '\n') 1139 /* Do nothing. */ ; 1140 *(p - 1) = 0; 1141 indexify (*next_line, ostream); 1142 } 1143 } 1144 1145 finish_index (ostream); 1146} 1147 1148/* Print error message and exit. */ 1149 1150void 1151fatal (const char *format, const char *arg) 1152{ 1153 error (format, arg); 1154 xexit (1); 1155} 1156 1157/* Print error message. FORMAT is printf control string, ARG is arg for it. */ 1158void 1159error (const char *format, const char *arg) 1160{ 1161 printf ("%s: ", program_name); 1162 printf (format, arg); 1163 if (format[strlen (format) -1] != '\n') 1164 printf ("\n"); 1165} 1166 1167void 1168perror_with_name (const char *name) 1169{ 1170 fprintf (stderr, "%s: ", program_name); 1171 perror (name); 1172} 1173 1174void 1175pfatal_with_name (const char *name) 1176{ 1177 perror_with_name (name); 1178 xexit (1); 1179} 1180 1181 1182/* Return a newly-allocated string concatenating S1, S2, and S3. */ 1183 1184static char * 1185concat3 (const char *s1, const char *s2, const char *s3) 1186{ 1187 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); 1188 char *result = (char *) xmalloc (len1 + len2 + len3 + 1); 1189 1190 strcpy (result, s1); 1191 strcpy (result + len1, s2); 1192 strcpy (result + len1 + len2, s3); 1193 *(result + len1 + len2 + len3) = 0; 1194 1195 return result; 1196} 1197