1/*	$NetBSD: texindex.c,v 1.1.1.7 2008/09/02 07:50:59 christos Exp $	*/
2
3/* texindex -- sort TeX index dribble output into an actual index.
4   Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
5
6   Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
7   2002, 2003, 2004 Free Software Foundation, Inc.
8
9   This program is free software; you can redistribute it and/or modify
10   it under the terms of the GNU General Public License as published by
11   the Free Software Foundation; either version 2, or (at your option)
12   any later version.
13
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software
21   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
22
23#include "system.h"
24#include <getopt.h>
25
26static char *program_name = "texindex";
27
28#if defined (emacs)
29#  include "../src/config.h"
30/* Some s/os.h files redefine these. */
31#  undef read
32#  undef close
33#  undef write
34#  undef open
35#endif
36
37#if !defined (HAVE_MEMSET)
38#undef memset
39#define memset(ptr, ignore, count) bzero (ptr, count)
40#endif
41
42#if !defined (SEEK_SET)
43#  define SEEK_SET 0
44#  define SEEK_CUR 1
45#  define SEEK_END 2
46#endif /* !SEEK_SET */
47
48/* When sorting in core, this structure describes one line
49   and the position and length of its first keyfield.  */
50struct lineinfo
51{
52  char *text;           /* The actual text of the line. */
53  union {
54    char *text;         /* The start of the key (for textual comparison). */
55    long number;        /* The numeric value (for numeric comparison). */
56  } key;
57  long keylen;          /* Length of KEY field. */
58};
59
60/* This structure describes a field to use as a sort key. */
61struct keyfield
62{
63  int startwords;       /* Number of words to skip. */
64  int startchars;       /* Number of additional chars to skip. */
65  int endwords;         /* Number of words to ignore at end. */
66  int endchars;         /* Ditto for characters of last word. */
67  char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
68  char fold_case;       /* Non-zero means case doesn't matter. */
69  char reverse;         /* Non-zero means compare in reverse order. */
70  char numeric;         /* Non-zeros means field is ASCII numeric. */
71  char positional;      /* Sort according to file position. */
72  char braced;          /* Count balanced-braced groupings as fields. */
73};
74
75/* Vector of keyfields to use. */
76struct keyfield keyfields[3];
77
78/* Number of keyfields stored in that vector.  */
79int num_keyfields = 3;
80
81/* Vector of input file names, terminated with a null pointer. */
82char **infiles;
83
84/* Vector of corresponding output file names, or NULL, meaning default it
85   (add an `s' to the end). */
86char **outfiles;
87
88/* Length of `infiles'. */
89int num_infiles;
90
91/* Pointer to the array of pointers to lines being sorted. */
92char **linearray;
93
94/* The allocated length of `linearray'. */
95long nlines;
96
97/* During in-core sort, this points to the base of the data block
98   which contains all the lines of data.  */
99char *text_base;
100
101/* Initially 0; changed to 1 if we want initials in this index.  */
102int need_initials;
103
104/* Remembers the first initial letter seen in this index, so we can
105   determine whether we need initials in the sorted form.  */
106char first_initial;
107
108/* Forward declarations of functions in this file. */
109void decode_command (int argc, char **argv);
110void sort_in_core (char *infile, int total, char *outfile);
111char **parsefile (char *filename, char **nextline, char *data, long int size);
112char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
113char *find_pos (char *str, int words, int chars, int ignore_blanks);
114long find_value (char *start, long int length);
115char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
116char *find_braced_end (char *str);
117void writelines (char **linearray, int nlines, FILE *ostream);
118int compare_field (struct keyfield *keyfield, char *start1,
119                   long int length1, long int pos1, char *start2,
120                   long int length2, long int pos2);
121int compare_full (const void *, const void *);
122void pfatal_with_name (const char *name);
123void fatal (const char *format, const char *arg);
124void error (const char *format, const char *arg);
125void *xmalloc (), *xrealloc ();
126static char *concat3 (const char *, const char *, const char *);
127
128int
129main (int argc, char **argv)
130{
131  int i;
132
133#ifdef HAVE_SETLOCALE
134  /* Set locale via LC_ALL.  */
135  setlocale (LC_ALL, "");
136#endif
137
138  /* Set the text message domain.  */
139  bindtextdomain (PACKAGE, LOCALEDIR);
140  textdomain (PACKAGE);
141
142  /* In case we write to a redirected stdout that fails.  */
143  /* not ready atexit (close_stdout); */
144
145  /* Describe the kind of sorting to do. */
146  /* The first keyfield uses the first braced field and folds case. */
147  keyfields[0].braced = 1;
148  keyfields[0].fold_case = 1;
149  keyfields[0].endwords = -1;
150  keyfields[0].endchars = -1;
151
152  /* The second keyfield uses the second braced field, numerically. */
153  keyfields[1].braced = 1;
154  keyfields[1].numeric = 1;
155  keyfields[1].startwords = 1;
156  keyfields[1].endwords = -1;
157  keyfields[1].endchars = -1;
158
159  /* The third keyfield (which is ignored while discarding duplicates)
160     compares the whole line. */
161  keyfields[2].endwords = -1;
162  keyfields[2].endchars = -1;
163
164  decode_command (argc, argv);
165
166  /* Process input files completely, one by one.  */
167
168  for (i = 0; i < num_infiles; i++)
169    {
170      int desc;
171      off_t ptr;
172      char *outfile;
173      struct stat instat;
174
175      desc = open (infiles[i], O_RDONLY, 0);
176      if (desc < 0)
177        pfatal_with_name (infiles[i]);
178
179      if (stat (infiles[i], &instat))
180        pfatal_with_name (infiles[i]);
181      if (S_ISDIR (instat.st_mode))
182        {
183#ifdef EISDIR
184          errno = EISDIR;
185#endif
186          pfatal_with_name (infiles[i]);
187        }
188
189      lseek (desc, (off_t) 0, SEEK_END);
190      ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
191
192      close (desc);
193
194      outfile = outfiles[i];
195      if (!outfile)
196        outfile = concat3 (infiles[i], "s", "");
197
198      need_initials = 0;
199      first_initial = '\0';
200
201      if (ptr != (int)ptr)
202	{
203	  fprintf (stderr, "%s: %s: file too large\n", program_name,
204		   infiles[i]);
205	  xexit (1);
206	}
207      sort_in_core (infiles[i], (int)ptr, outfile);
208    }
209
210  xexit (0);
211  return 0; /* Avoid bogus warnings.  */
212}
213
214typedef struct
215{
216  char *long_name;
217  char *short_name;
218  int *variable_ref;
219  int variable_value;
220  char *arg_name;
221  char *doc_string;
222} TEXINDEX_OPTION;
223
224TEXINDEX_OPTION texindex_options[] = {
225  { "--help", "-h", (int *)NULL, 0, (char *)NULL,
226      N_("display this help and exit") },
227  { "--output", "-o", (int *)NULL, 0, "FILE",
228      N_("send output to FILE") },
229  { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
230      N_("display version information and exit") },
231  { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
232};
233
234void
235usage (int result_value)
236{
237  register int i;
238  FILE *f = result_value ? stderr : stdout;
239
240  fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
241  fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
242  /* Avoid trigraph nonsense.  */
243  fprintf (f,
244_("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
245           '?', '?'); /* avoid trigraph in cat-id-tbl.c */
246  fprintf (f, _("\nOptions:\n"));
247
248  for (i = 0; texindex_options[i].long_name; i++)
249    {
250      putc (' ', f);
251
252      if (texindex_options[i].short_name)
253        fprintf (f, "%s, ", texindex_options[i].short_name);
254
255      fprintf (f, "%s %s",
256               texindex_options[i].long_name,
257               texindex_options[i].arg_name
258               ? texindex_options[i].arg_name : "");
259
260      fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
261    }
262  fputs (_("\n\
263Email bug reports to bug-texinfo@gnu.org,\n\
264general questions and discussion to help-texinfo@gnu.org.\n\
265Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
266  fputs ("\n", f);
267
268  xexit (result_value);
269}
270
271/* Decode the command line arguments to set the parameter variables
272   and set up the vector of keyfields and the vector of input files. */
273
274void
275decode_command (int argc, char **argv)
276{
277  int arg_index = 1;
278  char **ip;
279  char **op;
280
281  /* Allocate ARGC input files, which must be enough.  */
282
283  infiles = (char **) xmalloc (argc * sizeof (char *));
284  outfiles = (char **) xmalloc (argc * sizeof (char *));
285  ip = infiles;
286  op = outfiles;
287
288  while (arg_index < argc)
289    {
290      char *arg = argv[arg_index++];
291
292      if (*arg == '-')
293        {
294          if (strcmp (arg, "--version") == 0)
295            {
296              printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
297              puts ("");
298              puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
299              printf (_("There is NO warranty.  You may redistribute this software\n\
300under the terms of the GNU General Public License.\n\
301For more information about these matters, see the files named COPYING.\n"));
302              xexit (0);
303            }
304          else if ((strcmp (arg, "--keep") == 0) ||
305                   (strcmp (arg, "-k") == 0))
306            {
307	      /* Ignore, for backward compatibility */
308            }
309          else if ((strcmp (arg, "--help") == 0) ||
310                   (strcmp (arg, "-h") == 0))
311            {
312              usage (0);
313            }
314          else if ((strcmp (arg, "--output") == 0) ||
315                   (strcmp (arg, "-o") == 0))
316            {
317              if (argv[arg_index] != (char *)NULL)
318                {
319                  arg_index++;
320                  if (op > outfiles)
321                    *(op - 1) = argv[arg_index];
322                }
323              else
324                usage (1);
325            }
326          else
327            usage (1);
328        }
329      else
330        {
331          *ip++ = arg;
332          *op++ = (char *)NULL;
333        }
334    }
335
336  /* Record number of keyfields and terminate list of filenames. */
337  num_infiles = ip - infiles;
338  *ip = (char *)NULL;
339  if (num_infiles == 0)
340    usage (1);
341}
342
343/* Compare LINE1 and LINE2 according to the specified set of keyfields. */
344
345int
346compare_full (const void *p1, const void *p2)
347{
348  char **line1 = (char **) p1;
349  char **line2 = (char **) p2;
350  int i;
351
352  /* Compare using the first keyfield;
353     if that does not distinguish the lines, try the second keyfield;
354     and so on. */
355
356  for (i = 0; i < num_keyfields; i++)
357    {
358      long length1, length2;
359      char *start1 = find_field (&keyfields[i], *line1, &length1);
360      char *start2 = find_field (&keyfields[i], *line2, &length2);
361      int tem = compare_field (&keyfields[i], start1, length1,
362                               *line1 - text_base,
363                               start2, length2, *line2 - text_base);
364      if (tem)
365        {
366          if (keyfields[i].reverse)
367            return -tem;
368          return tem;
369        }
370    }
371
372  return 0;                     /* Lines match exactly. */
373}
374
375/* Compare LINE1 and LINE2, described by structures
376   in which the first keyfield is identified in advance.
377   For positional sorting, assumes that the order of the lines in core
378   reflects their nominal order.  */
379int
380compare_prepared (const void *p1, const void *p2)
381{
382  struct lineinfo *line1 = (struct lineinfo *) p1;
383  struct lineinfo *line2 = (struct lineinfo *) p2;
384  int i;
385  int tem;
386  char *text1, *text2;
387
388  /* Compare using the first keyfield, which has been found for us already. */
389  if (keyfields->positional)
390    {
391      if (line1->text - text_base > line2->text - text_base)
392        tem = 1;
393      else
394        tem = -1;
395    }
396  else if (keyfields->numeric)
397    tem = line1->key.number - line2->key.number;
398  else
399    tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
400                         line2->key.text, line2->keylen, 0);
401  if (tem)
402    {
403      if (keyfields->reverse)
404        return -tem;
405      return tem;
406    }
407
408  text1 = line1->text;
409  text2 = line2->text;
410
411  /* Compare using the second keyfield;
412     if that does not distinguish the lines, try the third keyfield;
413     and so on. */
414
415  for (i = 1; i < num_keyfields; i++)
416    {
417      long length1, length2;
418      char *start1 = find_field (&keyfields[i], text1, &length1);
419      char *start2 = find_field (&keyfields[i], text2, &length2);
420      int tem = compare_field (&keyfields[i], start1, length1,
421                               text1 - text_base,
422                               start2, length2, text2 - text_base);
423      if (tem)
424        {
425          if (keyfields[i].reverse)
426            return -tem;
427          return tem;
428        }
429    }
430
431  return 0;                     /* Lines match exactly. */
432}
433
434/* Like compare_full but more general.
435   You can pass any strings, and you can say how many keyfields to use.
436   POS1 and POS2 should indicate the nominal positional ordering of
437   the two lines in the input.  */
438
439int
440compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
441{
442  int i;
443
444  /* Compare using the first keyfield;
445     if that does not distinguish the lines, try the second keyfield;
446     and so on. */
447
448  for (i = 0; i < use_keyfields; i++)
449    {
450      long length1, length2;
451      char *start1 = find_field (&keyfields[i], str1, &length1);
452      char *start2 = find_field (&keyfields[i], str2, &length2);
453      int tem = compare_field (&keyfields[i], start1, length1, pos1,
454                               start2, length2, pos2);
455      if (tem)
456        {
457          if (keyfields[i].reverse)
458            return -tem;
459          return tem;
460        }
461    }
462
463  return 0;                     /* Lines match exactly. */
464}
465
466/* Find the start and length of a field in STR according to KEYFIELD.
467   A pointer to the starting character is returned, and the length
468   is stored into the int that LENGTHPTR points to.  */
469
470char *
471find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
472{
473  char *start;
474  char *end;
475  char *(*fun) ();
476
477  if (keyfield->braced)
478    fun = find_braced_pos;
479  else
480    fun = find_pos;
481
482  start = (*fun) (str, keyfield->startwords, keyfield->startchars,
483                  keyfield->ignore_blanks);
484  if (keyfield->endwords < 0)
485    {
486      if (keyfield->braced)
487        end = find_braced_end (start);
488      else
489        {
490          end = start;
491          while (*end && *end != '\n')
492            end++;
493        }
494    }
495  else
496    {
497      end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
498      if (end - str < start - str)
499        end = start;
500    }
501  *lengthptr = end - start;
502  return start;
503}
504
505/* Return a pointer to a specified place within STR,
506   skipping (from the beginning) WORDS words and then CHARS chars.
507   If IGNORE_BLANKS is nonzero, we skip all blanks
508   after finding the specified word.  */
509
510char *
511find_pos (char *str, int words, int chars, int ignore_blanks)
512{
513  int i;
514  char *p = str;
515
516  for (i = 0; i < words; i++)
517    {
518      char c;
519      /* Find next bunch of nonblanks and skip them. */
520      while ((c = *p) == ' ' || c == '\t')
521        p++;
522      while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
523        p++;
524      if (!*p || *p == '\n')
525        return p;
526    }
527
528  while (*p == ' ' || *p == '\t')
529    p++;
530
531  for (i = 0; i < chars; i++)
532    {
533      if (!*p || *p == '\n')
534        break;
535      p++;
536    }
537  return p;
538}
539
540/* Like find_pos but assumes that each field is surrounded by braces
541   and that braces within fields are balanced. */
542
543char *
544find_braced_pos (char *str, int words, int chars, int ignore_blanks)
545{
546  int i;
547  int bracelevel;
548  char *p = str;
549  char c;
550
551  for (i = 0; i < words; i++)
552    {
553      bracelevel = 1;
554      while ((c = *p++) != '{' && c != '\n' && c)
555        /* Do nothing. */ ;
556      if (c != '{')
557        return p - 1;
558      while (bracelevel)
559        {
560          c = *p++;
561          if (c == '{')
562            bracelevel++;
563          if (c == '}')
564            bracelevel--;
565          if (c == 0 || c == '\n')
566            return p - 1;
567        }
568    }
569
570  while ((c = *p++) != '{' && c != '\n' && c)
571    /* Do nothing. */ ;
572
573  if (c != '{')
574    return p - 1;
575
576  if (ignore_blanks)
577    while ((c = *p) == ' ' || c == '\t')
578      p++;
579
580  for (i = 0; i < chars; i++)
581    {
582      if (!*p || *p == '\n')
583        break;
584      p++;
585    }
586  return p;
587}
588
589/* Find the end of the balanced-brace field which starts at STR.
590   The position returned is just before the closing brace. */
591
592char *
593find_braced_end (char *str)
594{
595  int bracelevel;
596  char *p = str;
597  char c;
598
599  bracelevel = 1;
600  while (bracelevel)
601    {
602      c = *p++;
603      if (c == '{')
604        bracelevel++;
605      if (c == '}')
606        bracelevel--;
607      if (c == 0 || c == '\n')
608        return p - 1;
609    }
610  return p - 1;
611}
612
613long
614find_value (char *start, long int length)
615{
616  while (length != 0L)
617    {
618      if (isdigit (*start))
619        return atol (start);
620      length--;
621      start++;
622    }
623  return 0l;
624}
625
626/* Vector used to translate characters for comparison.
627   This is how we make all alphanumerics follow all else,
628   and ignore case in the first sorting.  */
629int char_order[256];
630
631void
632init_char_order (void)
633{
634  int i;
635  for (i = 1; i < 256; i++)
636    char_order[i] = i;
637
638  for (i = '0'; i <= '9'; i++)
639    char_order[i] += 512;
640
641  for (i = 'a'; i <= 'z'; i++)
642    {
643      char_order[i] = 512 + i;
644      char_order[i + 'A' - 'a'] = 512 + i;
645    }
646}
647
648/* Compare two fields (each specified as a start pointer and a character count)
649   according to KEYFIELD.
650   The sign of the value reports the relation between the fields. */
651
652int
653compare_field (struct keyfield *keyfield, char *start1, long int length1,
654               long int pos1, char *start2, long int length2, long int pos2)
655{
656  if (keyfields->positional)
657    {
658      if (pos1 > pos2)
659        return 1;
660      else
661        return -1;
662    }
663  if (keyfield->numeric)
664    {
665      long value = find_value (start1, length1) - find_value (start2, length2);
666      if (value > 0)
667        return 1;
668      if (value < 0)
669        return -1;
670      return 0;
671    }
672  else
673    {
674      char *p1 = start1;
675      char *p2 = start2;
676      char *e1 = start1 + length1;
677      char *e2 = start2 + length2;
678
679      while (1)
680        {
681          int c1, c2;
682
683          if (p1 == e1)
684            c1 = 0;
685          else
686            c1 = *p1++;
687          if (p2 == e2)
688            c2 = 0;
689          else
690            c2 = *p2++;
691
692          if (char_order[c1] != char_order[c2])
693            return char_order[c1] - char_order[c2];
694          if (!c1)
695            break;
696        }
697
698      /* Strings are equal except possibly for case.  */
699      p1 = start1;
700      p2 = start2;
701      while (1)
702        {
703          int c1, c2;
704
705          if (p1 == e1)
706            c1 = 0;
707          else
708            c1 = *p1++;
709          if (p2 == e2)
710            c2 = 0;
711          else
712            c2 = *p2++;
713
714          if (c1 != c2)
715            /* Reverse sign here so upper case comes out last.  */
716            return c2 - c1;
717          if (!c1)
718            break;
719        }
720
721      return 0;
722    }
723}
724
725/* Sort INFILE, whose size is TOTAL,
726   assuming that is small enough to be done in-core,
727   then indexify it and send the output to OUTFILE (or to stdout).  */
728
729void
730sort_in_core (char *infile, int total, char *outfile)
731{
732  char **nextline;
733  char *data = (char *) xmalloc (total + 1);
734  char *file_data;
735  long file_size;
736  int i;
737  FILE *ostream = stdout;
738  struct lineinfo *lineinfo;
739
740  /* Read the contents of the file into the moby array `data'. */
741
742  int desc = open (infile, O_RDONLY, 0);
743
744  if (desc < 0)
745    fatal (_("failure reopening %s"), infile);
746  for (file_size = 0;;)
747    {
748      i = read (desc, data + file_size, total - file_size);
749      if (i <= 0)
750        break;
751      file_size += i;
752    }
753  file_data = data;
754  data[file_size] = 0;
755
756  close (desc);
757
758  if (file_size > 0 && data[0] != '\\' && data[0] != '@')
759    {
760      error (_("%s: not a texinfo index file"), infile);
761      return;
762    }
763
764  init_char_order ();
765
766  /* Sort routines want to know this address. */
767
768  text_base = data;
769
770  /* Create the array of pointers to lines, with a default size
771     frequently enough.  */
772
773  nlines = total / 50;
774  if (!nlines)
775    nlines = 2;
776  linearray = (char **) xmalloc (nlines * sizeof (char *));
777
778  /* `nextline' points to the next free slot in this array.
779     `nlines' is the allocated size.  */
780
781  nextline = linearray;
782
783  /* Parse the input file's data, and make entries for the lines.  */
784
785  nextline = parsefile (infile, nextline, file_data, file_size);
786  if (nextline == 0)
787    {
788      error (_("%s: not a texinfo index file"), infile);
789      return;
790    }
791
792  /* Sort the lines. */
793
794  /* If we have enough space, find the first keyfield of each line in advance.
795     Make a `struct lineinfo' for each line, which records the keyfield
796     as well as the line, and sort them.  */
797
798  lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
799
800  if (lineinfo)
801    {
802      struct lineinfo *lp;
803      char **p;
804
805      for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
806        {
807          lp->text = *p;
808          lp->key.text = find_field (keyfields, *p, &lp->keylen);
809          if (keyfields->numeric)
810            lp->key.number = find_value (lp->key.text, lp->keylen);
811        }
812
813      qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
814             compare_prepared);
815
816      for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
817        *p = lp->text;
818
819      free (lineinfo);
820    }
821  else
822    qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
823
824  /* Open the output file. */
825
826  if (outfile)
827    {
828      ostream = fopen (outfile, "w");
829      if (!ostream)
830        pfatal_with_name (outfile);
831    }
832
833  writelines (linearray, nextline - linearray, ostream);
834  if (outfile)
835    fclose (ostream);
836
837  free (linearray);
838  free (data);
839}
840
841/* Parse an input string in core into lines.
842   DATA is the input string, and SIZE is its length.
843   Data goes in LINEARRAY starting at NEXTLINE.
844   The value returned is the first entry in LINEARRAY still unused.
845   Value 0 means input file contents are invalid.  */
846
847char **
848parsefile (char *filename, char **nextline, char *data, long int size)
849{
850  char *p, *end;
851  char **line = nextline;
852
853  p = data;
854  end = p + size;
855  *end = 0;
856
857  while (p != end)
858    {
859      if (p[0] != '\\' && p[0] != '@')
860        return 0;
861
862      *line = p;
863
864      /* Find the first letter of the first field of this line.  If it
865         is different from the first letter of the first field of the
866         first line, we need initial headers in the output index.  */
867      while (*p && *p != '{')
868        p++;
869      if (p == end)
870        return 0;
871      p++;
872      if (first_initial)
873        {
874          if (first_initial != toupper (*p))
875            need_initials = 1;
876        }
877      else
878        first_initial = toupper (*p);
879
880      while (*p && *p != '\n')
881        p++;
882      if (p != end)
883        p++;
884
885      line++;
886      if (line == linearray + nlines)
887        {
888          char **old = linearray;
889          linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
890          line += linearray - old;
891        }
892    }
893
894  return line;
895}
896
897/* Indexification is a filter applied to the sorted lines
898   as they are being written to the output file.
899   Multiple entries for the same name, with different page numbers,
900   get combined into a single entry with multiple page numbers.
901   The first braced field, which is used for sorting, is discarded.
902   However, its first character is examined, folded to lower case,
903   and if it is different from that in the previous line fed to us
904   a \initial line is written with one argument, the new initial.
905
906   If an entry has four braced fields, then the second and third
907   constitute primary and secondary names.
908   In this case, each change of primary name
909   generates a \primary line which contains only the primary name,
910   and in between these are \secondary lines which contain
911   just a secondary name and page numbers. */
912
913/* The last primary name we wrote a \primary entry for.
914   If only one level of indexing is being done, this is the last name seen. */
915char *lastprimary;
916/* Length of storage allocated for lastprimary. */
917int lastprimarylength;
918
919/* Similar, for the secondary name. */
920char *lastsecondary;
921int lastsecondarylength;
922
923/* Zero if we are not in the middle of writing an entry.
924   One if we have written the beginning of an entry but have not
925   yet written any page numbers into it.
926   Greater than one if we have written the beginning of an entry
927   plus at least one page number. */
928int pending;
929
930/* The initial (for sorting purposes) of the last primary entry written.
931   When this changes, a \initial {c} line is written */
932
933char *lastinitial;
934
935int lastinitiallength;
936
937/* When we need a string of length 1 for the value of lastinitial,
938   store it here.  */
939
940char lastinitial1[2];
941
942/* Initialize static storage for writing an index. */
943
944void
945init_index (void)
946{
947  pending = 0;
948  lastinitial = lastinitial1;
949  lastinitial1[0] = 0;
950  lastinitial1[1] = 0;
951  lastinitiallength = 0;
952  lastprimarylength = 100;
953  lastprimary = (char *) xmalloc (lastprimarylength + 1);
954  memset (lastprimary, '\0', lastprimarylength + 1);
955  lastsecondarylength = 100;
956  lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
957  memset (lastsecondary, '\0', lastsecondarylength + 1);
958}
959
960/* Indexify.  Merge entries for the same name,
961   insert headers for each initial character, etc.  */
962
963void
964indexify (char *line, FILE *ostream)
965{
966  char *primary, *secondary, *pagenumber;
967  int primarylength, secondarylength = 0, pagelength;
968  int nosecondary;
969  int initiallength;
970  char *initial;
971  char initial1[2];
972  register char *p;
973
974  /* First, analyze the parts of the entry fed to us this time. */
975
976  p = find_braced_pos (line, 0, 0, 0);
977  if (*p == '{')
978    {
979      initial = p;
980      /* Get length of inner pair of braces starting at `p',
981         including that inner pair of braces.  */
982      initiallength = find_braced_end (p + 1) + 1 - p;
983    }
984  else
985    {
986      initial = initial1;
987      initial1[0] = toupper (*p);
988      initial1[1] = 0;
989      initiallength = 1;
990    }
991
992  pagenumber = find_braced_pos (line, 1, 0, 0);
993  pagelength = find_braced_end (pagenumber) - pagenumber;
994  if (pagelength == 0)
995    fatal (_("No page number in %s"), line);
996
997  primary = find_braced_pos (line, 2, 0, 0);
998  primarylength = find_braced_end (primary) - primary;
999
1000  secondary = find_braced_pos (line, 3, 0, 0);
1001  nosecondary = !*secondary;
1002  if (!nosecondary)
1003    secondarylength = find_braced_end (secondary) - secondary;
1004
1005  /* If the primary is different from before, make a new primary entry. */
1006  if (strncmp (primary, lastprimary, primarylength))
1007    {
1008      /* Close off current secondary entry first, if one is open. */
1009      if (pending)
1010        {
1011          fputs ("}\n", ostream);
1012          pending = 0;
1013        }
1014
1015      /* If this primary has a different initial, include an entry for
1016         the initial. */
1017      if (need_initials &&
1018          (initiallength != lastinitiallength ||
1019           strncmp (initial, lastinitial, initiallength)))
1020        {
1021          fprintf (ostream, "\\initial {");
1022          fwrite (initial, 1, initiallength, ostream);
1023          fputs ("}\n", ostream);
1024          if (initial == initial1)
1025            {
1026              lastinitial = lastinitial1;
1027              *lastinitial1 = *initial1;
1028            }
1029          else
1030            {
1031              lastinitial = initial;
1032            }
1033          lastinitiallength = initiallength;
1034        }
1035
1036      /* Make the entry for the primary.  */
1037      if (nosecondary)
1038        fputs ("\\entry {", ostream);
1039      else
1040        fputs ("\\primary {", ostream);
1041      fwrite (primary, primarylength, 1, ostream);
1042      if (nosecondary)
1043        {
1044          fputs ("}{", ostream);
1045          pending = 1;
1046        }
1047      else
1048        fputs ("}\n", ostream);
1049
1050      /* Record name of most recent primary. */
1051      if (lastprimarylength < primarylength)
1052        {
1053          lastprimarylength = primarylength + 100;
1054          lastprimary = (char *) xrealloc (lastprimary,
1055                                           1 + lastprimarylength);
1056        }
1057      strncpy (lastprimary, primary, primarylength);
1058      lastprimary[primarylength] = 0;
1059
1060      /* There is no current secondary within this primary, now. */
1061      lastsecondary[0] = 0;
1062    }
1063
1064  /* Should not have an entry with no subtopic following one with a
1065     subtopic. */
1066
1067  if (nosecondary && *lastsecondary)
1068    error (_("entry %s follows an entry with a secondary name"), line);
1069
1070  /* Start a new secondary entry if necessary. */
1071  if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1072    {
1073      if (pending)
1074        {
1075          fputs ("}\n", ostream);
1076          pending = 0;
1077        }
1078
1079      /* Write the entry for the secondary.  */
1080      fputs ("\\secondary {", ostream);
1081      fwrite (secondary, secondarylength, 1, ostream);
1082      fputs ("}{", ostream);
1083      pending = 1;
1084
1085      /* Record name of most recent secondary. */
1086      if (lastsecondarylength < secondarylength)
1087        {
1088          lastsecondarylength = secondarylength + 100;
1089          lastsecondary = (char *) xrealloc (lastsecondary,
1090                                             1 + lastsecondarylength);
1091        }
1092      strncpy (lastsecondary, secondary, secondarylength);
1093      lastsecondary[secondarylength] = 0;
1094    }
1095
1096  /* Here to add one more page number to the current entry. */
1097  if (pending++ != 1)
1098    fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
1099  fwrite (pagenumber, pagelength, 1, ostream);
1100}
1101
1102/* Close out any unfinished output entry. */
1103
1104void
1105finish_index (FILE *ostream)
1106{
1107  if (pending)
1108    fputs ("}\n", ostream);
1109  free (lastprimary);
1110  free (lastsecondary);
1111}
1112
1113/* Copy the lines in the sorted order.
1114   Each line is copied out of the input file it was found in. */
1115
1116void
1117writelines (char **linearray, int nlines, FILE *ostream)
1118{
1119  char **stop_line = linearray + nlines;
1120  char **next_line;
1121
1122  init_index ();
1123
1124  /* Output the text of the lines, and free the buffer space. */
1125
1126  for (next_line = linearray; next_line != stop_line; next_line++)
1127    {
1128      /* Output the line only if distinct from previous one.  */
1129      if (next_line == linearray
1130      /* Compare previous line with this one, using only the
1131         explicitly specd keyfields. */
1132          || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1133                              num_keyfields - 1))
1134        {
1135          char *p = *next_line;
1136          char c;
1137
1138          while ((c = *p++) && c != '\n')
1139            /* Do nothing. */ ;
1140          *(p - 1) = 0;
1141          indexify (*next_line, ostream);
1142        }
1143    }
1144
1145  finish_index (ostream);
1146}
1147
1148/* Print error message and exit.  */
1149
1150void
1151fatal (const char *format, const char *arg)
1152{
1153  error (format, arg);
1154  xexit (1);
1155}
1156
1157/* Print error message.  FORMAT is printf control string, ARG is arg for it. */
1158void
1159error (const char *format, const char *arg)
1160{
1161  printf ("%s: ", program_name);
1162  printf (format, arg);
1163  if (format[strlen (format) -1] != '\n')
1164    printf ("\n");
1165}
1166
1167void
1168perror_with_name (const char *name)
1169{
1170  fprintf (stderr, "%s: ", program_name);
1171  perror (name);
1172}
1173
1174void
1175pfatal_with_name (const char *name)
1176{
1177  perror_with_name (name);
1178  xexit (1);
1179}
1180
1181
1182/* Return a newly-allocated string concatenating S1, S2, and S3.  */
1183
1184static char *
1185concat3 (const char *s1, const char *s2, const char *s3)
1186{
1187  int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1188  char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1189
1190  strcpy (result, s1);
1191  strcpy (result + len1, s2);
1192  strcpy (result + len1 + len2, s3);
1193  *(result + len1 + len2 + len3) = 0;
1194
1195  return result;
1196}
1197