1114472Sru/* texindex -- sort TeX index dribble output into an actual index.
2146515Sru   $Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp $
321495Sjmacd
4114472Sru   Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
5146515Sru   2002, 2003, 2004 Free Software Foundation, Inc.
621495Sjmacd
721495Sjmacd   This program is free software; you can redistribute it and/or modify
821495Sjmacd   it under the terms of the GNU General Public License as published by
921495Sjmacd   the Free Software Foundation; either version 2, or (at your option)
1021495Sjmacd   any later version.
1121495Sjmacd
1221495Sjmacd   This program is distributed in the hope that it will be useful,
1321495Sjmacd   but WITHOUT ANY WARRANTY; without even the implied warranty of
1421495Sjmacd   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1521495Sjmacd   GNU General Public License for more details.
1621495Sjmacd
1721495Sjmacd   You should have received a copy of the GNU General Public License
1821495Sjmacd   along with this program; if not, write to the Free Software
1921495Sjmacd   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
2021495Sjmacd
2142660Smarkm#include "system.h"
2242660Smarkm#include <getopt.h>
2321495Sjmacd
2456160Srustatic char *program_name = "texindex";
2556160Sru
2621495Sjmacd#if defined (emacs)
2721495Sjmacd#  include "../src/config.h"
2821495Sjmacd/* Some s/os.h files redefine these. */
2921495Sjmacd#  undef read
3021495Sjmacd#  undef close
3121495Sjmacd#  undef write
3221495Sjmacd#  undef open
3321495Sjmacd#endif
3421495Sjmacd
3521495Sjmacd#if !defined (HAVE_MEMSET)
3621495Sjmacd#undef memset
3721495Sjmacd#define memset(ptr, ignore, count) bzero (ptr, count)
3821495Sjmacd#endif
3921495Sjmacd
40146515Sruchar *mktemp (char *);
4121495Sjmacd
4221495Sjmacd#if !defined (SEEK_SET)
4321495Sjmacd#  define SEEK_SET 0
4421495Sjmacd#  define SEEK_CUR 1
4521495Sjmacd#  define SEEK_END 2
4621495Sjmacd#endif /* !SEEK_SET */
4721495Sjmacd
48146515Srustruct linebuffer;
49146515Sru
5021495Sjmacd/* When sorting in core, this structure describes one line
5121495Sjmacd   and the position and length of its first keyfield.  */
5221495Sjmacdstruct lineinfo
5321495Sjmacd{
5442660Smarkm  char *text;           /* The actual text of the line. */
5521495Sjmacd  union {
5642660Smarkm    char *text;         /* The start of the key (for textual comparison). */
5742660Smarkm    long number;        /* The numeric value (for numeric comparison). */
5821495Sjmacd  } key;
5942660Smarkm  long keylen;          /* Length of KEY field. */
6021495Sjmacd};
6121495Sjmacd
6221495Sjmacd/* This structure describes a field to use as a sort key. */
6321495Sjmacdstruct keyfield
6421495Sjmacd{
6542660Smarkm  int startwords;       /* Number of words to skip. */
6642660Smarkm  int startchars;       /* Number of additional chars to skip. */
6742660Smarkm  int endwords;         /* Number of words to ignore at end. */
6842660Smarkm  int endchars;         /* Ditto for characters of last word. */
6942660Smarkm  char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
7042660Smarkm  char fold_case;       /* Non-zero means case doesn't matter. */
7142660Smarkm  char reverse;         /* Non-zero means compare in reverse order. */
7242660Smarkm  char numeric;         /* Non-zeros means field is ASCII numeric. */
7342660Smarkm  char positional;      /* Sort according to file position. */
7442660Smarkm  char braced;          /* Count balanced-braced groupings as fields. */
7521495Sjmacd};
7621495Sjmacd
7721495Sjmacd/* Vector of keyfields to use. */
7821495Sjmacdstruct keyfield keyfields[3];
7921495Sjmacd
8021495Sjmacd/* Number of keyfields stored in that vector.  */
8121495Sjmacdint num_keyfields = 3;
8221495Sjmacd
8321495Sjmacd/* Vector of input file names, terminated with a null pointer. */
8421495Sjmacdchar **infiles;
8521495Sjmacd
8621495Sjmacd/* Vector of corresponding output file names, or NULL, meaning default it
8721495Sjmacd   (add an `s' to the end). */
8821495Sjmacdchar **outfiles;
8921495Sjmacd
9021495Sjmacd/* Length of `infiles'. */
9121495Sjmacdint num_infiles;
9221495Sjmacd
9321495Sjmacd/* Pointer to the array of pointers to lines being sorted. */
9421495Sjmacdchar **linearray;
9521495Sjmacd
9621495Sjmacd/* The allocated length of `linearray'. */
9721495Sjmacdlong nlines;
9821495Sjmacd
9921495Sjmacd/* Directory to use for temporary files.  On Unix, it ends with a slash.  */
10021495Sjmacdchar *tempdir;
10121495Sjmacd
10221495Sjmacd/* Number of last temporary file.  */
10321495Sjmacdint tempcount;
10421495Sjmacd
10521495Sjmacd/* Number of last temporary file already deleted.
10621495Sjmacd   Temporary files are deleted by `flush_tempfiles' in order of creation.  */
10721495Sjmacdint last_deleted_tempcount;
10821495Sjmacd
10921495Sjmacd/* During in-core sort, this points to the base of the data block
11021495Sjmacd   which contains all the lines of data.  */
11121495Sjmacdchar *text_base;
11221495Sjmacd
113114472Sru/* Initially 0; changed to 1 if we want initials in this index.  */
114114472Sruint need_initials;
115114472Sru
116114472Sru/* Remembers the first initial letter seen in this index, so we can
117114472Sru   determine whether we need initials in the sorted form.  */
118114472Sruchar first_initial;
119114472Sru
12021495Sjmacd/* Additional command switches .*/
12121495Sjmacd
12221495Sjmacd/* Nonzero means do not delete tempfiles -- for debugging. */
12321495Sjmacdint keep_tempfiles;
12421495Sjmacd
12521495Sjmacd/* Forward declarations of functions in this file. */
126146515Sruvoid decode_command (int argc, char **argv);
127146515Sruvoid sort_in_core (char *infile, int total, char *outfile);
128146515Sruvoid sort_offline (char *infile, off_t total, char *outfile);
129146515Sruchar **parsefile (char *filename, char **nextline, char *data, long int size);
130146515Sruchar *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
131146515Sruchar *find_pos (char *str, int words, int chars, int ignore_blanks);
132146515Srulong find_value (char *start, long int length);
133146515Sruchar *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
134146515Sruchar *find_braced_end (char *str);
135146515Sruvoid writelines (char **linearray, int nlines, FILE *ostream);
136146515Sruint compare_field (struct keyfield *keyfield, char *start1,
137146515Sru                   long int length1, long int pos1, char *start2,
138146515Sru                   long int length2, long int pos2);
139146515Sruint compare_full (const void *, const void *);
140146515Srulong readline (struct linebuffer *linebuffer, FILE *stream);
141146515Sruint merge_files (char **infiles, int nfiles, char *outfile);
142146515Sruint merge_direct (char **infiles, int nfiles, char *outfile);
143146515Sruvoid pfatal_with_name (const char *name);
144146515Sruvoid fatal (const char *format, const char *arg);
145146515Sruvoid error (const char *format, const char *arg);
14621495Sjmacdvoid *xmalloc (), *xrealloc ();
147146515Sruchar *concat (char *s1, char *s2);
148146515Sruvoid flush_tempfiles (int to_count);
14921495Sjmacd
15021495Sjmacd#define MAX_IN_CORE_SORT 500000
15121495Sjmacd
15242660Smarkmint
153146515Srumain (int argc, char **argv)
15421495Sjmacd{
15521495Sjmacd  int i;
15621495Sjmacd
15721495Sjmacd  tempcount = 0;
15821495Sjmacd  last_deleted_tempcount = 0;
15921495Sjmacd
16042660Smarkm#ifdef HAVE_SETLOCALE
16142660Smarkm  /* Set locale via LC_ALL.  */
16242660Smarkm  setlocale (LC_ALL, "");
16342660Smarkm#endif
16442660Smarkm
16542660Smarkm  /* Set the text message domain.  */
16642660Smarkm  bindtextdomain (PACKAGE, LOCALEDIR);
16742660Smarkm  textdomain (PACKAGE);
16842660Smarkm
169114472Sru  /* In case we write to a redirected stdout that fails.  */
170114472Sru  /* not ready atexit (close_stdout); */
171116525Sru
17221495Sjmacd  /* Describe the kind of sorting to do. */
17321495Sjmacd  /* The first keyfield uses the first braced field and folds case. */
17421495Sjmacd  keyfields[0].braced = 1;
17521495Sjmacd  keyfields[0].fold_case = 1;
17621495Sjmacd  keyfields[0].endwords = -1;
17721495Sjmacd  keyfields[0].endchars = -1;
17821495Sjmacd
17921495Sjmacd  /* The second keyfield uses the second braced field, numerically. */
18021495Sjmacd  keyfields[1].braced = 1;
18121495Sjmacd  keyfields[1].numeric = 1;
18221495Sjmacd  keyfields[1].startwords = 1;
18321495Sjmacd  keyfields[1].endwords = -1;
18421495Sjmacd  keyfields[1].endchars = -1;
18521495Sjmacd
18621495Sjmacd  /* The third keyfield (which is ignored while discarding duplicates)
18721495Sjmacd     compares the whole line. */
18821495Sjmacd  keyfields[2].endwords = -1;
18921495Sjmacd  keyfields[2].endchars = -1;
19021495Sjmacd
19121495Sjmacd  decode_command (argc, argv);
19221495Sjmacd
19321495Sjmacd  /* Process input files completely, one by one.  */
19421495Sjmacd
19521495Sjmacd  for (i = 0; i < num_infiles; i++)
19621495Sjmacd    {
19721495Sjmacd      int desc;
19856160Sru      off_t ptr;
19921495Sjmacd      char *outfile;
20056160Sru      struct stat instat;
20121495Sjmacd
20221495Sjmacd      desc = open (infiles[i], O_RDONLY, 0);
20321495Sjmacd      if (desc < 0)
20442660Smarkm        pfatal_with_name (infiles[i]);
20556160Sru
20656160Sru      if (stat (infiles[i], &instat))
20756160Sru        pfatal_with_name (infiles[i]);
20856160Sru      if (S_ISDIR (instat.st_mode))
20956160Sru        {
21056160Sru#ifdef EISDIR
21156160Sru          errno = EISDIR;
21256160Sru#endif
21356160Sru          pfatal_with_name (infiles[i]);
21456160Sru        }
21556160Sru
21621495Sjmacd      lseek (desc, (off_t) 0, SEEK_END);
21756160Sru      ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
21821495Sjmacd
21921495Sjmacd      close (desc);
22021495Sjmacd
22121495Sjmacd      outfile = outfiles[i];
22221495Sjmacd      if (!outfile)
223116525Sru        outfile = concat (infiles[i], "s");
22421495Sjmacd
225114472Sru      need_initials = 0;
226114472Sru      first_initial = '\0';
227114472Sru
22821495Sjmacd      if (ptr < MAX_IN_CORE_SORT)
22942660Smarkm        /* Sort a small amount of data. */
230114472Sru        sort_in_core (infiles[i], (int)ptr, outfile);
23121495Sjmacd      else
23242660Smarkm        sort_offline (infiles[i], ptr, outfile);
23321495Sjmacd    }
23421495Sjmacd
23521495Sjmacd  flush_tempfiles (tempcount);
23656160Sru  xexit (0);
23742660Smarkm  return 0; /* Avoid bogus warnings.  */
23821495Sjmacd}
23921495Sjmacd
24021495Sjmacdtypedef struct
24121495Sjmacd{
24221495Sjmacd  char *long_name;
24321495Sjmacd  char *short_name;
24421495Sjmacd  int *variable_ref;
24521495Sjmacd  int variable_value;
24621495Sjmacd  char *arg_name;
24721495Sjmacd  char *doc_string;
24821495Sjmacd} TEXINDEX_OPTION;
24921495Sjmacd
25021495SjmacdTEXINDEX_OPTION texindex_options[] = {
25156160Sru  { "--help", "-h", (int *)NULL, 0, (char *)NULL,
25256160Sru      N_("display this help and exit") },
25321495Sjmacd  { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
25442660Smarkm      N_("keep temporary files around after processing") },
25521495Sjmacd  { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
25642660Smarkm      N_("do not keep temporary files around after processing (default)") },
25721495Sjmacd  { "--output", "-o", (int *)NULL, 0, "FILE",
25842660Smarkm      N_("send output to FILE") },
25921495Sjmacd  { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
26042660Smarkm      N_("display version information and exit") },
26121495Sjmacd  { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
26221495Sjmacd};
26321495Sjmacd
26421495Sjmacdvoid
265146515Sruusage (int result_value)
26621495Sjmacd{
26721495Sjmacd  register int i;
26821495Sjmacd  FILE *f = result_value ? stderr : stdout;
26921495Sjmacd
27042660Smarkm  fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
27142660Smarkm  fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
27221495Sjmacd  /* Avoid trigraph nonsense.  */
27356160Sru  fprintf (f,
27456160Sru_("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
27556160Sru           '?', '?'); /* avoid trigraph in cat-id-tbl.c */
27642660Smarkm  fprintf (f, _("\nOptions:\n"));
27721495Sjmacd
27821495Sjmacd  for (i = 0; texindex_options[i].long_name; i++)
27921495Sjmacd    {
28056160Sru      putc (' ', f);
28156160Sru
28221495Sjmacd      if (texindex_options[i].short_name)
28342660Smarkm        fprintf (f, "%s, ", texindex_options[i].short_name);
28421495Sjmacd
28521495Sjmacd      fprintf (f, "%s %s",
28642660Smarkm               texindex_options[i].long_name,
28742660Smarkm               texindex_options[i].arg_name
28821495Sjmacd               ? texindex_options[i].arg_name : "");
28921495Sjmacd
29042660Smarkm      fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
29121495Sjmacd    }
29256160Sru  fputs (_("\n\
29356160SruEmail bug reports to bug-texinfo@gnu.org,\n\
29456160Srugeneral questions and discussion to help-texinfo@gnu.org.\n\
295100513SruTexinfo home page: http://www.gnu.org/software/texinfo/"), f);
296100513Sru  fputs ("\n", f);
29721495Sjmacd
29856160Sru  xexit (result_value);
29921495Sjmacd}
30021495Sjmacd
30121495Sjmacd/* Decode the command line arguments to set the parameter variables
30221495Sjmacd   and set up the vector of keyfields and the vector of input files. */
30321495Sjmacd
30421495Sjmacdvoid
305146515Srudecode_command (int argc, char **argv)
30621495Sjmacd{
30721495Sjmacd  int arg_index = 1;
30821495Sjmacd  char **ip;
30921495Sjmacd  char **op;
31021495Sjmacd
31121495Sjmacd  /* Store default values into parameter variables. */
31221495Sjmacd
31321495Sjmacd  tempdir = getenv ("TMPDIR");
31421495Sjmacd  if (tempdir == NULL)
31556160Sru    tempdir = getenv ("TEMP");
31621495Sjmacd  if (tempdir == NULL)
31756160Sru    tempdir = getenv ("TMP");
31856160Sru  if (tempdir == NULL)
31956160Sru    tempdir = DEFAULT_TMPDIR;
32021495Sjmacd  else
321116525Sru    tempdir = concat (tempdir, "/");
32221495Sjmacd
32321495Sjmacd  keep_tempfiles = 0;
32421495Sjmacd
32521495Sjmacd  /* Allocate ARGC input files, which must be enough.  */
32621495Sjmacd
32721495Sjmacd  infiles = (char **) xmalloc (argc * sizeof (char *));
32821495Sjmacd  outfiles = (char **) xmalloc (argc * sizeof (char *));
32921495Sjmacd  ip = infiles;
33021495Sjmacd  op = outfiles;
33121495Sjmacd
33221495Sjmacd  while (arg_index < argc)
33321495Sjmacd    {
33421495Sjmacd      char *arg = argv[arg_index++];
33521495Sjmacd
33621495Sjmacd      if (*arg == '-')
33742660Smarkm        {
33842660Smarkm          if (strcmp (arg, "--version") == 0)
33942660Smarkm            {
34042660Smarkm              printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
34156160Sru              puts ("");
342146515Sru              puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
343146515Sru              printf (_("There is NO warranty.  You may redistribute this software\n\
34421495Sjmacdunder the terms of the GNU General Public License.\n\
345146515SruFor more information about these matters, see the files named COPYING.\n"));
34656160Sru              xexit (0);
34742660Smarkm            }
34842660Smarkm          else if ((strcmp (arg, "--keep") == 0) ||
34942660Smarkm                   (strcmp (arg, "-k") == 0))
35042660Smarkm            {
35142660Smarkm              keep_tempfiles = 1;
35242660Smarkm            }
35342660Smarkm          else if ((strcmp (arg, "--help") == 0) ||
35442660Smarkm                   (strcmp (arg, "-h") == 0))
35542660Smarkm            {
35642660Smarkm              usage (0);
35742660Smarkm            }
35842660Smarkm          else if ((strcmp (arg, "--output") == 0) ||
35942660Smarkm                   (strcmp (arg, "-o") == 0))
36042660Smarkm            {
36142660Smarkm              if (argv[arg_index] != (char *)NULL)
36242660Smarkm                {
36342660Smarkm                  arg_index++;
36442660Smarkm                  if (op > outfiles)
36542660Smarkm                    *(op - 1) = argv[arg_index];
36642660Smarkm                }
36742660Smarkm              else
36842660Smarkm                usage (1);
36942660Smarkm            }
37042660Smarkm          else
37142660Smarkm            usage (1);
37242660Smarkm        }
37321495Sjmacd      else
37442660Smarkm        {
37542660Smarkm          *ip++ = arg;
37642660Smarkm          *op++ = (char *)NULL;
37742660Smarkm        }
37821495Sjmacd    }
37921495Sjmacd
38021495Sjmacd  /* Record number of keyfields and terminate list of filenames. */
38121495Sjmacd  num_infiles = ip - infiles;
38221495Sjmacd  *ip = (char *)NULL;
38321495Sjmacd  if (num_infiles == 0)
38421495Sjmacd    usage (1);
38521495Sjmacd}
38621495Sjmacd
387154216Scpercivastatic char **tv;
388154216Scpercivastatic int tv_alloc;
389154216Scpercivastatic int tv_used;
390154216Scperciva
391154216Scpercivastatic int
392154216Scpercivafindtempname (char *tempname)
393154216Scperciva{
394154216Scperciva  int i;
395154216Scperciva
396154216Scperciva  for (i = 0; i < tv_used; i++)
397154216Scperciva    if (strcmp (tv[i], tempname) == 0)
398154216Scperciva	return (1);
399154216Scperciva  return (0);
400154216Scperciva}
401154216Scperciva
402114472Sru/* Return a name for temporary file COUNT. */
40321495Sjmacd
40456160Srustatic char *
405146515Srumaketempname (int count)
40621495Sjmacd{
407114472Sru  static char *tempbase = NULL;
408154216Scperciva  char *tempname;
40921495Sjmacd  char tempsuffix[10];
410154216Scperciva  int fd;
411114472Sru
412114472Sru  if (!tempbase)
413114472Sru    {
414116525Sru      tempbase = concat (tempdir, "txidxXXXXXX");
415114472Sru
416114472Sru      fd = mkstemp (tempbase);
417116525Sru      if (fd == -1)
418114472Sru        pfatal_with_name (tempbase);
419114472Sru    }
420114472Sru
42156160Sru  sprintf (tempsuffix, ".%d", count);
422154216Scperciva  tempname = concat (tempbase, tempsuffix);
423154216Scperciva  /*
424154216Scperciva   * The open logic becomes a bit convoluted. If open(2) fails due to EEXIST,
425154216Scperciva   * it's likely because somebody attempted to race us, or because we have
426154216Scperciva   * already created this file.
427154216Scperciva   */
428154216Scperciva  fd = open (tempname, O_CREAT|O_EXCL|O_WRONLY, 0600);
429154216Scperciva  if (fd == -1)
430154216Scperciva    {
431154216Scperciva	/*
432154216Scperciva	 * If errno is not EEXIST, then open failed for some other reason, so
433154216Scperciva	 * we should terminate. If errno == EEXIST AND we didn't create this
434154216Scperciva	 * file, terminate. Otherwise, it's safe to say that errno == EEXIST
435154216Scperciva	 * because we already created it, in this event, we can just return.
436154216Scperciva	 */
437154216Scperciva	if (errno != EEXIST ||
438154216Scperciva	  (errno == EEXIST && findtempname (tempname) == 0))
439154216Scperciva	  pfatal_with_name (tempname);
440154216Scperciva	return (tempname);
441154216Scperciva    }
442154216Scperciva  else if (fd > 0)
443154216Scperciva    {
444154216Scperciva	close (fd);
445154216Scperciva    }
446154216Scperciva  if (tv == NULL)
447154216Scperciva    {
448154216Scperciva	tv_alloc = 16;
449154216Scperciva	tv = calloc (tv_alloc, sizeof (char *));
450154216Scperciva	if (tv == NULL)
451154216Scperciva	  {
452154216Scperciva	    fprintf (stderr, "calloc failed\n");
453154216Scperciva	    exit (1);
454154216Scperciva	  }
455154216Scperciva    }
456154216Scperciva  else if (tv_used == tv_alloc)
457154216Scperciva    {
458154216Scperciva	tv_alloc += 4;
459154216Scperciva	tv = realloc (tv, tv_alloc * sizeof (char *));
460154216Scperciva	if (tv == NULL)
461154216Scperciva	  {
462154216Scperciva	    fprintf (stderr, "realloc failed");
463154216Scperciva	    exit (1);
464154216Scperciva	  }
465154216Scperciva    }
466154216Scperciva  tv[tv_used++] = strdup (tempname);
467154216Scperciva  return tempname;
46821495Sjmacd}
46921495Sjmacd
470114472Sru
47121495Sjmacd/* Delete all temporary files up to TO_COUNT. */
47221495Sjmacd
47321495Sjmacdvoid
474146515Sruflush_tempfiles (int to_count)
47521495Sjmacd{
47621495Sjmacd  if (keep_tempfiles)
47721495Sjmacd    return;
47821495Sjmacd  while (last_deleted_tempcount < to_count)
47921495Sjmacd    unlink (maketempname (++last_deleted_tempcount));
48021495Sjmacd}
48121495Sjmacd
48221495Sjmacd
48321495Sjmacd/* Compare LINE1 and LINE2 according to the specified set of keyfields. */
48421495Sjmacd
48521495Sjmacdint
486146515Srucompare_full (const void *p1, const void *p2)
48721495Sjmacd{
488146515Sru  char **line1 = (char **) p1;
489146515Sru  char **line2 = (char **) p2;
49021495Sjmacd  int i;
49121495Sjmacd
49221495Sjmacd  /* Compare using the first keyfield;
49321495Sjmacd     if that does not distinguish the lines, try the second keyfield;
49421495Sjmacd     and so on. */
49521495Sjmacd
49621495Sjmacd  for (i = 0; i < num_keyfields; i++)
49721495Sjmacd    {
49821495Sjmacd      long length1, length2;
49921495Sjmacd      char *start1 = find_field (&keyfields[i], *line1, &length1);
50021495Sjmacd      char *start2 = find_field (&keyfields[i], *line2, &length2);
501146515Sru      int tem = compare_field (&keyfields[i], start1, length1,
502146515Sru                               *line1 - text_base,
50342660Smarkm                               start2, length2, *line2 - text_base);
50421495Sjmacd      if (tem)
50542660Smarkm        {
50642660Smarkm          if (keyfields[i].reverse)
50742660Smarkm            return -tem;
50842660Smarkm          return tem;
50942660Smarkm        }
51021495Sjmacd    }
51121495Sjmacd
51242660Smarkm  return 0;                     /* Lines match exactly. */
51321495Sjmacd}
51421495Sjmacd
51521495Sjmacd/* Compare LINE1 and LINE2, described by structures
51621495Sjmacd   in which the first keyfield is identified in advance.
51721495Sjmacd   For positional sorting, assumes that the order of the lines in core
51821495Sjmacd   reflects their nominal order.  */
51921495Sjmacdint
520146515Srucompare_prepared (const void *p1, const void *p2)
52121495Sjmacd{
522146515Sru  struct lineinfo *line1 = (struct lineinfo *) p1;
523146515Sru  struct lineinfo *line2 = (struct lineinfo *) p2;
52421495Sjmacd  int i;
52521495Sjmacd  int tem;
52621495Sjmacd  char *text1, *text2;
52721495Sjmacd
52821495Sjmacd  /* Compare using the first keyfield, which has been found for us already. */
52921495Sjmacd  if (keyfields->positional)
53021495Sjmacd    {
53121495Sjmacd      if (line1->text - text_base > line2->text - text_base)
53242660Smarkm        tem = 1;
53321495Sjmacd      else
53442660Smarkm        tem = -1;
53521495Sjmacd    }
53621495Sjmacd  else if (keyfields->numeric)
53721495Sjmacd    tem = line1->key.number - line2->key.number;
53821495Sjmacd  else
53921495Sjmacd    tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
54042660Smarkm                         line2->key.text, line2->keylen, 0);
54121495Sjmacd  if (tem)
54221495Sjmacd    {
54321495Sjmacd      if (keyfields->reverse)
54442660Smarkm        return -tem;
54521495Sjmacd      return tem;
54621495Sjmacd    }
54721495Sjmacd
54821495Sjmacd  text1 = line1->text;
54921495Sjmacd  text2 = line2->text;
55021495Sjmacd
55121495Sjmacd  /* Compare using the second keyfield;
55221495Sjmacd     if that does not distinguish the lines, try the third keyfield;
55321495Sjmacd     and so on. */
55421495Sjmacd
55521495Sjmacd  for (i = 1; i < num_keyfields; i++)
55621495Sjmacd    {
55721495Sjmacd      long length1, length2;
55821495Sjmacd      char *start1 = find_field (&keyfields[i], text1, &length1);
55921495Sjmacd      char *start2 = find_field (&keyfields[i], text2, &length2);
560146515Sru      int tem = compare_field (&keyfields[i], start1, length1,
561146515Sru                               text1 - text_base,
56242660Smarkm                               start2, length2, text2 - text_base);
56321495Sjmacd      if (tem)
56442660Smarkm        {
56542660Smarkm          if (keyfields[i].reverse)
56642660Smarkm            return -tem;
56742660Smarkm          return tem;
56842660Smarkm        }
56921495Sjmacd    }
57021495Sjmacd
57142660Smarkm  return 0;                     /* Lines match exactly. */
57221495Sjmacd}
57321495Sjmacd
57421495Sjmacd/* Like compare_full but more general.
57521495Sjmacd   You can pass any strings, and you can say how many keyfields to use.
57621495Sjmacd   POS1 and POS2 should indicate the nominal positional ordering of
57721495Sjmacd   the two lines in the input.  */
57821495Sjmacd
57921495Sjmacdint
580146515Srucompare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
58121495Sjmacd{
58221495Sjmacd  int i;
58321495Sjmacd
58421495Sjmacd  /* Compare using the first keyfield;
58521495Sjmacd     if that does not distinguish the lines, try the second keyfield;
58621495Sjmacd     and so on. */
58721495Sjmacd
58821495Sjmacd  for (i = 0; i < use_keyfields; i++)
58921495Sjmacd    {
59021495Sjmacd      long length1, length2;
59121495Sjmacd      char *start1 = find_field (&keyfields[i], str1, &length1);
59221495Sjmacd      char *start2 = find_field (&keyfields[i], str2, &length2);
59321495Sjmacd      int tem = compare_field (&keyfields[i], start1, length1, pos1,
59442660Smarkm                               start2, length2, pos2);
59521495Sjmacd      if (tem)
59642660Smarkm        {
59742660Smarkm          if (keyfields[i].reverse)
59842660Smarkm            return -tem;
59942660Smarkm          return tem;
60042660Smarkm        }
60121495Sjmacd    }
60221495Sjmacd
60342660Smarkm  return 0;                     /* Lines match exactly. */
60421495Sjmacd}
60521495Sjmacd
60621495Sjmacd/* Find the start and length of a field in STR according to KEYFIELD.
60721495Sjmacd   A pointer to the starting character is returned, and the length
60821495Sjmacd   is stored into the int that LENGTHPTR points to.  */
60921495Sjmacd
61021495Sjmacdchar *
611146515Srufind_field (struct keyfield *keyfield, char *str, long int *lengthptr)
61221495Sjmacd{
61321495Sjmacd  char *start;
61421495Sjmacd  char *end;
61521495Sjmacd  char *(*fun) ();
61621495Sjmacd
61721495Sjmacd  if (keyfield->braced)
61821495Sjmacd    fun = find_braced_pos;
61921495Sjmacd  else
62021495Sjmacd    fun = find_pos;
62121495Sjmacd
62221495Sjmacd  start = (*fun) (str, keyfield->startwords, keyfield->startchars,
62342660Smarkm                  keyfield->ignore_blanks);
62421495Sjmacd  if (keyfield->endwords < 0)
62521495Sjmacd    {
62621495Sjmacd      if (keyfield->braced)
62742660Smarkm        end = find_braced_end (start);
62821495Sjmacd      else
62942660Smarkm        {
63042660Smarkm          end = start;
63142660Smarkm          while (*end && *end != '\n')
63242660Smarkm            end++;
63342660Smarkm        }
63421495Sjmacd    }
63521495Sjmacd  else
63621495Sjmacd    {
63721495Sjmacd      end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
63821495Sjmacd      if (end - str < start - str)
63942660Smarkm        end = start;
64021495Sjmacd    }
64121495Sjmacd  *lengthptr = end - start;
64221495Sjmacd  return start;
64321495Sjmacd}
64421495Sjmacd
64521495Sjmacd/* Return a pointer to a specified place within STR,
64621495Sjmacd   skipping (from the beginning) WORDS words and then CHARS chars.
64721495Sjmacd   If IGNORE_BLANKS is nonzero, we skip all blanks
64821495Sjmacd   after finding the specified word.  */
64921495Sjmacd
65021495Sjmacdchar *
651146515Srufind_pos (char *str, int words, int chars, int ignore_blanks)
65221495Sjmacd{
65321495Sjmacd  int i;
65421495Sjmacd  char *p = str;
65521495Sjmacd
65621495Sjmacd  for (i = 0; i < words; i++)
65721495Sjmacd    {
65821495Sjmacd      char c;
65921495Sjmacd      /* Find next bunch of nonblanks and skip them. */
66021495Sjmacd      while ((c = *p) == ' ' || c == '\t')
66142660Smarkm        p++;
66221495Sjmacd      while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
66342660Smarkm        p++;
66421495Sjmacd      if (!*p || *p == '\n')
66542660Smarkm        return p;
66621495Sjmacd    }
66721495Sjmacd
66821495Sjmacd  while (*p == ' ' || *p == '\t')
66921495Sjmacd    p++;
67021495Sjmacd
67121495Sjmacd  for (i = 0; i < chars; i++)
67221495Sjmacd    {
67321495Sjmacd      if (!*p || *p == '\n')
67442660Smarkm        break;
67521495Sjmacd      p++;
67621495Sjmacd    }
67721495Sjmacd  return p;
67821495Sjmacd}
67921495Sjmacd
68021495Sjmacd/* Like find_pos but assumes that each field is surrounded by braces
68121495Sjmacd   and that braces within fields are balanced. */
68221495Sjmacd
68321495Sjmacdchar *
684146515Srufind_braced_pos (char *str, int words, int chars, int ignore_blanks)
68521495Sjmacd{
68621495Sjmacd  int i;
68721495Sjmacd  int bracelevel;
68821495Sjmacd  char *p = str;
68921495Sjmacd  char c;
69021495Sjmacd
69121495Sjmacd  for (i = 0; i < words; i++)
69221495Sjmacd    {
69321495Sjmacd      bracelevel = 1;
69421495Sjmacd      while ((c = *p++) != '{' && c != '\n' && c)
69542660Smarkm        /* Do nothing. */ ;
69621495Sjmacd      if (c != '{')
69742660Smarkm        return p - 1;
69821495Sjmacd      while (bracelevel)
69942660Smarkm        {
70042660Smarkm          c = *p++;
70142660Smarkm          if (c == '{')
70242660Smarkm            bracelevel++;
70342660Smarkm          if (c == '}')
70442660Smarkm            bracelevel--;
70542660Smarkm          if (c == 0 || c == '\n')
70642660Smarkm            return p - 1;
70742660Smarkm        }
70821495Sjmacd    }
70921495Sjmacd
71021495Sjmacd  while ((c = *p++) != '{' && c != '\n' && c)
71121495Sjmacd    /* Do nothing. */ ;
71221495Sjmacd
71321495Sjmacd  if (c != '{')
71421495Sjmacd    return p - 1;
71521495Sjmacd
71621495Sjmacd  if (ignore_blanks)
71721495Sjmacd    while ((c = *p) == ' ' || c == '\t')
71821495Sjmacd      p++;
71921495Sjmacd
72021495Sjmacd  for (i = 0; i < chars; i++)
72121495Sjmacd    {
72221495Sjmacd      if (!*p || *p == '\n')
72342660Smarkm        break;
72421495Sjmacd      p++;
72521495Sjmacd    }
72621495Sjmacd  return p;
72721495Sjmacd}
72821495Sjmacd
72921495Sjmacd/* Find the end of the balanced-brace field which starts at STR.
73021495Sjmacd   The position returned is just before the closing brace. */
73121495Sjmacd
73221495Sjmacdchar *
733146515Srufind_braced_end (char *str)
73421495Sjmacd{
73521495Sjmacd  int bracelevel;
73621495Sjmacd  char *p = str;
73721495Sjmacd  char c;
73821495Sjmacd
73921495Sjmacd  bracelevel = 1;
74021495Sjmacd  while (bracelevel)
74121495Sjmacd    {
74221495Sjmacd      c = *p++;
74321495Sjmacd      if (c == '{')
74442660Smarkm        bracelevel++;
74521495Sjmacd      if (c == '}')
74642660Smarkm        bracelevel--;
74721495Sjmacd      if (c == 0 || c == '\n')
74842660Smarkm        return p - 1;
74921495Sjmacd    }
75021495Sjmacd  return p - 1;
75121495Sjmacd}
75221495Sjmacd
75321495Sjmacdlong
754146515Srufind_value (char *start, long int length)
75521495Sjmacd{
75621495Sjmacd  while (length != 0L)
75721495Sjmacd    {
75821495Sjmacd      if (isdigit (*start))
75942660Smarkm        return atol (start);
76021495Sjmacd      length--;
76121495Sjmacd      start++;
76221495Sjmacd    }
76321495Sjmacd  return 0l;
76421495Sjmacd}
76521495Sjmacd
76621495Sjmacd/* Vector used to translate characters for comparison.
76721495Sjmacd   This is how we make all alphanumerics follow all else,
76821495Sjmacd   and ignore case in the first sorting.  */
76921495Sjmacdint char_order[256];
77021495Sjmacd
77121495Sjmacdvoid
772146515Sruinit_char_order (void)
77321495Sjmacd{
77421495Sjmacd  int i;
77521495Sjmacd  for (i = 1; i < 256; i++)
77621495Sjmacd    char_order[i] = i;
77721495Sjmacd
77821495Sjmacd  for (i = '0'; i <= '9'; i++)
77921495Sjmacd    char_order[i] += 512;
78021495Sjmacd
78121495Sjmacd  for (i = 'a'; i <= 'z'; i++)
78221495Sjmacd    {
78321495Sjmacd      char_order[i] = 512 + i;
78421495Sjmacd      char_order[i + 'A' - 'a'] = 512 + i;
78521495Sjmacd    }
78621495Sjmacd}
78721495Sjmacd
78821495Sjmacd/* Compare two fields (each specified as a start pointer and a character count)
78921495Sjmacd   according to KEYFIELD.
79021495Sjmacd   The sign of the value reports the relation between the fields. */
79121495Sjmacd
79221495Sjmacdint
793146515Srucompare_field (struct keyfield *keyfield, char *start1, long int length1,
794146515Sru               long int pos1, char *start2, long int length2, long int pos2)
79521495Sjmacd{
79621495Sjmacd  if (keyfields->positional)
79721495Sjmacd    {
79821495Sjmacd      if (pos1 > pos2)
79942660Smarkm        return 1;
80021495Sjmacd      else
80142660Smarkm        return -1;
80221495Sjmacd    }
80321495Sjmacd  if (keyfield->numeric)
80421495Sjmacd    {
80521495Sjmacd      long value = find_value (start1, length1) - find_value (start2, length2);
80621495Sjmacd      if (value > 0)
80742660Smarkm        return 1;
80821495Sjmacd      if (value < 0)
80942660Smarkm        return -1;
81021495Sjmacd      return 0;
81121495Sjmacd    }
81221495Sjmacd  else
81321495Sjmacd    {
81421495Sjmacd      char *p1 = start1;
81521495Sjmacd      char *p2 = start2;
81621495Sjmacd      char *e1 = start1 + length1;
81721495Sjmacd      char *e2 = start2 + length2;
81821495Sjmacd
81921495Sjmacd      while (1)
82042660Smarkm        {
82142660Smarkm          int c1, c2;
82221495Sjmacd
82342660Smarkm          if (p1 == e1)
82442660Smarkm            c1 = 0;
82542660Smarkm          else
82642660Smarkm            c1 = *p1++;
82742660Smarkm          if (p2 == e2)
82842660Smarkm            c2 = 0;
82942660Smarkm          else
83042660Smarkm            c2 = *p2++;
83121495Sjmacd
83242660Smarkm          if (char_order[c1] != char_order[c2])
83342660Smarkm            return char_order[c1] - char_order[c2];
83442660Smarkm          if (!c1)
83542660Smarkm            break;
83642660Smarkm        }
83721495Sjmacd
83821495Sjmacd      /* Strings are equal except possibly for case.  */
83921495Sjmacd      p1 = start1;
84021495Sjmacd      p2 = start2;
84121495Sjmacd      while (1)
84242660Smarkm        {
84342660Smarkm          int c1, c2;
84421495Sjmacd
84542660Smarkm          if (p1 == e1)
84642660Smarkm            c1 = 0;
84742660Smarkm          else
84842660Smarkm            c1 = *p1++;
84942660Smarkm          if (p2 == e2)
85042660Smarkm            c2 = 0;
85142660Smarkm          else
85242660Smarkm            c2 = *p2++;
85321495Sjmacd
85442660Smarkm          if (c1 != c2)
85542660Smarkm            /* Reverse sign here so upper case comes out last.  */
85642660Smarkm            return c2 - c1;
85742660Smarkm          if (!c1)
85842660Smarkm            break;
85942660Smarkm        }
86021495Sjmacd
86121495Sjmacd      return 0;
86221495Sjmacd    }
86321495Sjmacd}
86421495Sjmacd
86521495Sjmacd/* A `struct linebuffer' is a structure which holds a line of text.
86621495Sjmacd   `readline' reads a line from a stream into a linebuffer
86721495Sjmacd   and works regardless of the length of the line.  */
86821495Sjmacd
86921495Sjmacdstruct linebuffer
87021495Sjmacd{
87121495Sjmacd  long size;
87221495Sjmacd  char *buffer;
87321495Sjmacd};
87421495Sjmacd
87521495Sjmacd/* Initialize LINEBUFFER for use. */
87621495Sjmacd
87721495Sjmacdvoid
878146515Sruinitbuffer (struct linebuffer *linebuffer)
87921495Sjmacd{
88021495Sjmacd  linebuffer->size = 200;
88121495Sjmacd  linebuffer->buffer = (char *) xmalloc (200);
88221495Sjmacd}
88321495Sjmacd
88421495Sjmacd/* Read a line of text from STREAM into LINEBUFFER.
88521495Sjmacd   Return the length of the line.  */
88621495Sjmacd
88721495Sjmacdlong
888146515Srureadline (struct linebuffer *linebuffer, FILE *stream)
88921495Sjmacd{
89021495Sjmacd  char *buffer = linebuffer->buffer;
89121495Sjmacd  char *p = linebuffer->buffer;
89221495Sjmacd  char *end = p + linebuffer->size;
89321495Sjmacd
89421495Sjmacd  while (1)
89521495Sjmacd    {
89621495Sjmacd      int c = getc (stream);
89721495Sjmacd      if (p == end)
89842660Smarkm        {
89942660Smarkm          buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
90042660Smarkm          p += buffer - linebuffer->buffer;
90142660Smarkm          end += buffer - linebuffer->buffer;
90242660Smarkm          linebuffer->buffer = buffer;
90342660Smarkm        }
90421495Sjmacd      if (c < 0 || c == '\n')
90542660Smarkm        {
90642660Smarkm          *p = 0;
90742660Smarkm          break;
90842660Smarkm        }
90921495Sjmacd      *p++ = c;
91021495Sjmacd    }
91121495Sjmacd
91221495Sjmacd  return p - buffer;
91321495Sjmacd}
91421495Sjmacd
91521495Sjmacd/* Sort an input file too big to sort in core.  */
91621495Sjmacd
91721495Sjmacdvoid
918146515Srusort_offline (char *infile, off_t total, char *outfile)
91921495Sjmacd{
92021495Sjmacd  /* More than enough. */
92121495Sjmacd  int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
92221495Sjmacd  char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
92321495Sjmacd  FILE *istream = fopen (infile, "r");
92421495Sjmacd  int i;
92521495Sjmacd  struct linebuffer lb;
92621495Sjmacd  long linelength;
92721495Sjmacd  int failure = 0;
92821495Sjmacd
92921495Sjmacd  initbuffer (&lb);
93021495Sjmacd
93121495Sjmacd  /* Read in one line of input data.  */
93221495Sjmacd
93321495Sjmacd  linelength = readline (&lb, istream);
93421495Sjmacd
93521495Sjmacd  if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
93621495Sjmacd    {
93742660Smarkm      error (_("%s: not a texinfo index file"), infile);
93821495Sjmacd      return;
93921495Sjmacd    }
94021495Sjmacd
94121495Sjmacd  /* Split up the input into `ntemps' temporary files, or maybe fewer,
94221495Sjmacd     and put the new files' names into `tempfiles' */
94321495Sjmacd
94421495Sjmacd  for (i = 0; i < ntemps; i++)
94521495Sjmacd    {
94621495Sjmacd      char *outname = maketempname (++tempcount);
94721495Sjmacd      FILE *ostream = fopen (outname, "w");
94821495Sjmacd      long tempsize = 0;
94921495Sjmacd
95021495Sjmacd      if (!ostream)
95142660Smarkm        pfatal_with_name (outname);
95221495Sjmacd      tempfiles[i] = outname;
95321495Sjmacd
95421495Sjmacd      /* Copy lines into this temp file as long as it does not make file
95542660Smarkm         "too big" or until there are no more lines.  */
95621495Sjmacd
95721495Sjmacd      while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
95842660Smarkm        {
95942660Smarkm          tempsize += linelength + 1;
96042660Smarkm          fputs (lb.buffer, ostream);
96142660Smarkm          putc ('\n', ostream);
96221495Sjmacd
96342660Smarkm          /* Read another line of input data.  */
96421495Sjmacd
96542660Smarkm          linelength = readline (&lb, istream);
96642660Smarkm          if (!linelength && feof (istream))
96742660Smarkm            break;
96821495Sjmacd
96942660Smarkm          if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
97042660Smarkm            {
97142660Smarkm              error (_("%s: not a texinfo index file"), infile);
97242660Smarkm              failure = 1;
97342660Smarkm              goto fail;
97442660Smarkm            }
97542660Smarkm        }
97621495Sjmacd      fclose (ostream);
97721495Sjmacd      if (feof (istream))
97842660Smarkm        break;
97921495Sjmacd    }
98021495Sjmacd
98121495Sjmacd  free (lb.buffer);
98221495Sjmacd
98321495Sjmacdfail:
98421495Sjmacd  /* Record number of temp files we actually needed.  */
98521495Sjmacd
98621495Sjmacd  ntemps = i;
98721495Sjmacd
98821495Sjmacd  /* Sort each tempfile into another tempfile.
98921495Sjmacd    Delete the first set of tempfiles and put the names of the second
99021495Sjmacd    into `tempfiles'. */
99121495Sjmacd
99221495Sjmacd  for (i = 0; i < ntemps; i++)
99321495Sjmacd    {
99421495Sjmacd      char *newtemp = maketempname (++tempcount);
995114472Sru      sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
99621495Sjmacd      if (!keep_tempfiles)
99742660Smarkm        unlink (tempfiles[i]);
99821495Sjmacd      tempfiles[i] = newtemp;
99921495Sjmacd    }
100021495Sjmacd
100121495Sjmacd  if (failure)
100221495Sjmacd    return;
100321495Sjmacd
100421495Sjmacd  /* Merge the tempfiles together and indexify. */
100521495Sjmacd
100621495Sjmacd  merge_files (tempfiles, ntemps, outfile);
100721495Sjmacd}
100821495Sjmacd
100921495Sjmacd/* Sort INFILE, whose size is TOTAL,
101021495Sjmacd   assuming that is small enough to be done in-core,
101121495Sjmacd   then indexify it and send the output to OUTFILE (or to stdout).  */
101221495Sjmacd
101321495Sjmacdvoid
1014146515Srusort_in_core (char *infile, int total, char *outfile)
101521495Sjmacd{
101621495Sjmacd  char **nextline;
101721495Sjmacd  char *data = (char *) xmalloc (total + 1);
101821495Sjmacd  char *file_data;
101921495Sjmacd  long file_size;
102021495Sjmacd  int i;
102121495Sjmacd  FILE *ostream = stdout;
102221495Sjmacd  struct lineinfo *lineinfo;
102321495Sjmacd
102421495Sjmacd  /* Read the contents of the file into the moby array `data'. */
102521495Sjmacd
102621495Sjmacd  int desc = open (infile, O_RDONLY, 0);
102721495Sjmacd
102821495Sjmacd  if (desc < 0)
102942660Smarkm    fatal (_("failure reopening %s"), infile);
103021495Sjmacd  for (file_size = 0;;)
103121495Sjmacd    {
103221495Sjmacd      i = read (desc, data + file_size, total - file_size);
103321495Sjmacd      if (i <= 0)
103442660Smarkm        break;
103521495Sjmacd      file_size += i;
103621495Sjmacd    }
103721495Sjmacd  file_data = data;
103821495Sjmacd  data[file_size] = 0;
103921495Sjmacd
104021495Sjmacd  close (desc);
104121495Sjmacd
104221495Sjmacd  if (file_size > 0 && data[0] != '\\' && data[0] != '@')
104321495Sjmacd    {
104442660Smarkm      error (_("%s: not a texinfo index file"), infile);
104521495Sjmacd      return;
104621495Sjmacd    }
104721495Sjmacd
104821495Sjmacd  init_char_order ();
104921495Sjmacd
105021495Sjmacd  /* Sort routines want to know this address. */
105121495Sjmacd
105221495Sjmacd  text_base = data;
105321495Sjmacd
105421495Sjmacd  /* Create the array of pointers to lines, with a default size
105521495Sjmacd     frequently enough.  */
105621495Sjmacd
105721495Sjmacd  nlines = total / 50;
105821495Sjmacd  if (!nlines)
105921495Sjmacd    nlines = 2;
106021495Sjmacd  linearray = (char **) xmalloc (nlines * sizeof (char *));
106121495Sjmacd
106221495Sjmacd  /* `nextline' points to the next free slot in this array.
106321495Sjmacd     `nlines' is the allocated size.  */
106421495Sjmacd
106521495Sjmacd  nextline = linearray;
106621495Sjmacd
106721495Sjmacd  /* Parse the input file's data, and make entries for the lines.  */
106821495Sjmacd
106921495Sjmacd  nextline = parsefile (infile, nextline, file_data, file_size);
107021495Sjmacd  if (nextline == 0)
107121495Sjmacd    {
107242660Smarkm      error (_("%s: not a texinfo index file"), infile);
107321495Sjmacd      return;
107421495Sjmacd    }
107521495Sjmacd
107621495Sjmacd  /* Sort the lines. */
107721495Sjmacd
107821495Sjmacd  /* If we have enough space, find the first keyfield of each line in advance.
107921495Sjmacd     Make a `struct lineinfo' for each line, which records the keyfield
108021495Sjmacd     as well as the line, and sort them.  */
108121495Sjmacd
1082146515Sru  lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
108321495Sjmacd
108421495Sjmacd  if (lineinfo)
108521495Sjmacd    {
108621495Sjmacd      struct lineinfo *lp;
108721495Sjmacd      char **p;
108821495Sjmacd
108921495Sjmacd      for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
109042660Smarkm        {
109142660Smarkm          lp->text = *p;
109242660Smarkm          lp->key.text = find_field (keyfields, *p, &lp->keylen);
109342660Smarkm          if (keyfields->numeric)
109442660Smarkm            lp->key.number = find_value (lp->key.text, lp->keylen);
109542660Smarkm        }
109621495Sjmacd
109721495Sjmacd      qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
109821495Sjmacd             compare_prepared);
109921495Sjmacd
110021495Sjmacd      for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
110142660Smarkm        *p = lp->text;
110221495Sjmacd
110321495Sjmacd      free (lineinfo);
110421495Sjmacd    }
110521495Sjmacd  else
110621495Sjmacd    qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
110721495Sjmacd
110821495Sjmacd  /* Open the output file. */
110921495Sjmacd
111021495Sjmacd  if (outfile)
111121495Sjmacd    {
111221495Sjmacd      ostream = fopen (outfile, "w");
111321495Sjmacd      if (!ostream)
111442660Smarkm        pfatal_with_name (outfile);
111521495Sjmacd    }
111621495Sjmacd
111721495Sjmacd  writelines (linearray, nextline - linearray, ostream);
111821495Sjmacd  if (outfile)
111921495Sjmacd    fclose (ostream);
112021495Sjmacd
112121495Sjmacd  free (linearray);
112221495Sjmacd  free (data);
112321495Sjmacd}
112421495Sjmacd
112521495Sjmacd/* Parse an input string in core into lines.
112621495Sjmacd   DATA is the input string, and SIZE is its length.
112721495Sjmacd   Data goes in LINEARRAY starting at NEXTLINE.
112821495Sjmacd   The value returned is the first entry in LINEARRAY still unused.
112921495Sjmacd   Value 0 means input file contents are invalid.  */
113021495Sjmacd
113121495Sjmacdchar **
1132146515Sruparsefile (char *filename, char **nextline, char *data, long int size)
113321495Sjmacd{
113421495Sjmacd  char *p, *end;
113521495Sjmacd  char **line = nextline;
113621495Sjmacd
113721495Sjmacd  p = data;
113821495Sjmacd  end = p + size;
113921495Sjmacd  *end = 0;
114021495Sjmacd
114121495Sjmacd  while (p != end)
114221495Sjmacd    {
114321495Sjmacd      if (p[0] != '\\' && p[0] != '@')
114442660Smarkm        return 0;
114521495Sjmacd
114621495Sjmacd      *line = p;
1147114472Sru
1148114472Sru      /* Find the first letter of the first field of this line.  If it
1149114472Sru         is different from the first letter of the first field of the
1150114472Sru         first line, we need initial headers in the output index.  */
1151114472Sru      while (*p && *p != '{')
1152114472Sru        p++;
1153114472Sru      if (p == end)
1154114472Sru        return 0;
1155114472Sru      p++;
1156114472Sru      if (first_initial)
1157114472Sru        {
1158114472Sru          if (first_initial != toupper (*p))
1159114472Sru            need_initials = 1;
1160114472Sru        }
1161114472Sru      else
1162114472Sru        first_initial = toupper (*p);
1163116525Sru
116421495Sjmacd      while (*p && *p != '\n')
116542660Smarkm        p++;
116621495Sjmacd      if (p != end)
116742660Smarkm        p++;
116821495Sjmacd
116921495Sjmacd      line++;
117021495Sjmacd      if (line == linearray + nlines)
117142660Smarkm        {
117242660Smarkm          char **old = linearray;
1173146515Sru          linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
117442660Smarkm          line += linearray - old;
117542660Smarkm        }
117621495Sjmacd    }
117721495Sjmacd
117821495Sjmacd  return line;
117921495Sjmacd}
118021495Sjmacd
118121495Sjmacd/* Indexification is a filter applied to the sorted lines
118221495Sjmacd   as they are being written to the output file.
118321495Sjmacd   Multiple entries for the same name, with different page numbers,
118421495Sjmacd   get combined into a single entry with multiple page numbers.
118521495Sjmacd   The first braced field, which is used for sorting, is discarded.
118621495Sjmacd   However, its first character is examined, folded to lower case,
118721495Sjmacd   and if it is different from that in the previous line fed to us
118821495Sjmacd   a \initial line is written with one argument, the new initial.
118921495Sjmacd
119021495Sjmacd   If an entry has four braced fields, then the second and third
119121495Sjmacd   constitute primary and secondary names.
119221495Sjmacd   In this case, each change of primary name
119321495Sjmacd   generates a \primary line which contains only the primary name,
119421495Sjmacd   and in between these are \secondary lines which contain
119521495Sjmacd   just a secondary name and page numbers. */
119621495Sjmacd
119721495Sjmacd/* The last primary name we wrote a \primary entry for.
119821495Sjmacd   If only one level of indexing is being done, this is the last name seen. */
119921495Sjmacdchar *lastprimary;
120021495Sjmacd/* Length of storage allocated for lastprimary. */
120121495Sjmacdint lastprimarylength;
120221495Sjmacd
120321495Sjmacd/* Similar, for the secondary name. */
120421495Sjmacdchar *lastsecondary;
120521495Sjmacdint lastsecondarylength;
120621495Sjmacd
120721495Sjmacd/* Zero if we are not in the middle of writing an entry.
120821495Sjmacd   One if we have written the beginning of an entry but have not
120921495Sjmacd   yet written any page numbers into it.
121021495Sjmacd   Greater than one if we have written the beginning of an entry
121121495Sjmacd   plus at least one page number. */
121221495Sjmacdint pending;
121321495Sjmacd
121421495Sjmacd/* The initial (for sorting purposes) of the last primary entry written.
121521495Sjmacd   When this changes, a \initial {c} line is written */
121621495Sjmacd
121721495Sjmacdchar *lastinitial;
121821495Sjmacd
121921495Sjmacdint lastinitiallength;
122021495Sjmacd
122121495Sjmacd/* When we need a string of length 1 for the value of lastinitial,
122221495Sjmacd   store it here.  */
122321495Sjmacd
122421495Sjmacdchar lastinitial1[2];
122521495Sjmacd
122621495Sjmacd/* Initialize static storage for writing an index. */
122721495Sjmacd
122821495Sjmacdvoid
1229146515Sruinit_index (void)
123021495Sjmacd{
123121495Sjmacd  pending = 0;
123221495Sjmacd  lastinitial = lastinitial1;
123321495Sjmacd  lastinitial1[0] = 0;
123421495Sjmacd  lastinitial1[1] = 0;
123521495Sjmacd  lastinitiallength = 0;
123621495Sjmacd  lastprimarylength = 100;
123721495Sjmacd  lastprimary = (char *) xmalloc (lastprimarylength + 1);
123821495Sjmacd  memset (lastprimary, '\0', lastprimarylength + 1);
123921495Sjmacd  lastsecondarylength = 100;
124021495Sjmacd  lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
124121495Sjmacd  memset (lastsecondary, '\0', lastsecondarylength + 1);
124221495Sjmacd}
124321495Sjmacd
124421495Sjmacd/* Indexify.  Merge entries for the same name,
124521495Sjmacd   insert headers for each initial character, etc.  */
124621495Sjmacd
124721495Sjmacdvoid
1248146515Sruindexify (char *line, FILE *ostream)
124921495Sjmacd{
125021495Sjmacd  char *primary, *secondary, *pagenumber;
125121495Sjmacd  int primarylength, secondarylength = 0, pagelength;
125221495Sjmacd  int nosecondary;
125321495Sjmacd  int initiallength;
125421495Sjmacd  char *initial;
125521495Sjmacd  char initial1[2];
125621495Sjmacd  register char *p;
125721495Sjmacd
125821495Sjmacd  /* First, analyze the parts of the entry fed to us this time. */
125921495Sjmacd
126021495Sjmacd  p = find_braced_pos (line, 0, 0, 0);
126121495Sjmacd  if (*p == '{')
126221495Sjmacd    {
126321495Sjmacd      initial = p;
126421495Sjmacd      /* Get length of inner pair of braces starting at `p',
126542660Smarkm         including that inner pair of braces.  */
126621495Sjmacd      initiallength = find_braced_end (p + 1) + 1 - p;
126721495Sjmacd    }
126821495Sjmacd  else
126921495Sjmacd    {
127021495Sjmacd      initial = initial1;
1271114472Sru      initial1[0] = toupper (*p);
127221495Sjmacd      initial1[1] = 0;
127321495Sjmacd      initiallength = 1;
127421495Sjmacd    }
127521495Sjmacd
127621495Sjmacd  pagenumber = find_braced_pos (line, 1, 0, 0);
127721495Sjmacd  pagelength = find_braced_end (pagenumber) - pagenumber;
127821495Sjmacd  if (pagelength == 0)
127956160Sru    fatal (_("No page number in %s"), line);
128021495Sjmacd
128121495Sjmacd  primary = find_braced_pos (line, 2, 0, 0);
128221495Sjmacd  primarylength = find_braced_end (primary) - primary;
128321495Sjmacd
128421495Sjmacd  secondary = find_braced_pos (line, 3, 0, 0);
128521495Sjmacd  nosecondary = !*secondary;
128621495Sjmacd  if (!nosecondary)
128721495Sjmacd    secondarylength = find_braced_end (secondary) - secondary;
128821495Sjmacd
128921495Sjmacd  /* If the primary is different from before, make a new primary entry. */
129021495Sjmacd  if (strncmp (primary, lastprimary, primarylength))
129121495Sjmacd    {
129221495Sjmacd      /* Close off current secondary entry first, if one is open. */
129321495Sjmacd      if (pending)
129442660Smarkm        {
129542660Smarkm          fputs ("}\n", ostream);
129642660Smarkm          pending = 0;
129742660Smarkm        }
129821495Sjmacd
129921495Sjmacd      /* If this primary has a different initial, include an entry for
130042660Smarkm         the initial. */
1301114472Sru      if (need_initials &&
1302114472Sru          (initiallength != lastinitiallength ||
1303114472Sru           strncmp (initial, lastinitial, initiallength)))
130442660Smarkm        {
130542660Smarkm          fprintf (ostream, "\\initial {");
130642660Smarkm          fwrite (initial, 1, initiallength, ostream);
130742660Smarkm          fputs ("}\n", ostream);
130842660Smarkm          if (initial == initial1)
130942660Smarkm            {
131042660Smarkm              lastinitial = lastinitial1;
131142660Smarkm              *lastinitial1 = *initial1;
131242660Smarkm            }
131342660Smarkm          else
131442660Smarkm            {
131542660Smarkm              lastinitial = initial;
131642660Smarkm            }
131742660Smarkm          lastinitiallength = initiallength;
131842660Smarkm        }
131921495Sjmacd
132021495Sjmacd      /* Make the entry for the primary.  */
132121495Sjmacd      if (nosecondary)
132242660Smarkm        fputs ("\\entry {", ostream);
132321495Sjmacd      else
132442660Smarkm        fputs ("\\primary {", ostream);
132521495Sjmacd      fwrite (primary, primarylength, 1, ostream);
132621495Sjmacd      if (nosecondary)
132742660Smarkm        {
132842660Smarkm          fputs ("}{", ostream);
132942660Smarkm          pending = 1;
133042660Smarkm        }
133121495Sjmacd      else
133242660Smarkm        fputs ("}\n", ostream);
133321495Sjmacd
133421495Sjmacd      /* Record name of most recent primary. */
133521495Sjmacd      if (lastprimarylength < primarylength)
133642660Smarkm        {
133742660Smarkm          lastprimarylength = primarylength + 100;
133842660Smarkm          lastprimary = (char *) xrealloc (lastprimary,
133942660Smarkm                                           1 + lastprimarylength);
134042660Smarkm        }
134121495Sjmacd      strncpy (lastprimary, primary, primarylength);
134221495Sjmacd      lastprimary[primarylength] = 0;
134321495Sjmacd
134421495Sjmacd      /* There is no current secondary within this primary, now. */
134521495Sjmacd      lastsecondary[0] = 0;
134621495Sjmacd    }
134721495Sjmacd
1348146515Sru  /* Should not have an entry with no subtopic following one with a
1349146515Sru     subtopic. */
135021495Sjmacd
135121495Sjmacd  if (nosecondary && *lastsecondary)
135242660Smarkm    error (_("entry %s follows an entry with a secondary name"), line);
135321495Sjmacd
135421495Sjmacd  /* Start a new secondary entry if necessary. */
135521495Sjmacd  if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
135621495Sjmacd    {
135721495Sjmacd      if (pending)
135842660Smarkm        {
135942660Smarkm          fputs ("}\n", ostream);
136042660Smarkm          pending = 0;
136142660Smarkm        }
136221495Sjmacd
136321495Sjmacd      /* Write the entry for the secondary.  */
136421495Sjmacd      fputs ("\\secondary {", ostream);
136521495Sjmacd      fwrite (secondary, secondarylength, 1, ostream);
136621495Sjmacd      fputs ("}{", ostream);
136721495Sjmacd      pending = 1;
136821495Sjmacd
136921495Sjmacd      /* Record name of most recent secondary. */
137021495Sjmacd      if (lastsecondarylength < secondarylength)
137142660Smarkm        {
137242660Smarkm          lastsecondarylength = secondarylength + 100;
137342660Smarkm          lastsecondary = (char *) xrealloc (lastsecondary,
137442660Smarkm                                             1 + lastsecondarylength);
137542660Smarkm        }
137621495Sjmacd      strncpy (lastsecondary, secondary, secondarylength);
137721495Sjmacd      lastsecondary[secondarylength] = 0;
137821495Sjmacd    }
137921495Sjmacd
138021495Sjmacd  /* Here to add one more page number to the current entry. */
138121495Sjmacd  if (pending++ != 1)
1382146515Sru    fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
138321495Sjmacd  fwrite (pagenumber, pagelength, 1, ostream);
138421495Sjmacd}
138521495Sjmacd
138621495Sjmacd/* Close out any unfinished output entry. */
138721495Sjmacd
138821495Sjmacdvoid
1389146515Srufinish_index (FILE *ostream)
139021495Sjmacd{
139121495Sjmacd  if (pending)
139221495Sjmacd    fputs ("}\n", ostream);
139321495Sjmacd  free (lastprimary);
139421495Sjmacd  free (lastsecondary);
139521495Sjmacd}
139621495Sjmacd
139721495Sjmacd/* Copy the lines in the sorted order.
139821495Sjmacd   Each line is copied out of the input file it was found in. */
139921495Sjmacd
140021495Sjmacdvoid
1401146515Sruwritelines (char **linearray, int nlines, FILE *ostream)
140221495Sjmacd{
140321495Sjmacd  char **stop_line = linearray + nlines;
140421495Sjmacd  char **next_line;
140521495Sjmacd
140621495Sjmacd  init_index ();
140721495Sjmacd
140821495Sjmacd  /* Output the text of the lines, and free the buffer space. */
140921495Sjmacd
141021495Sjmacd  for (next_line = linearray; next_line != stop_line; next_line++)
141121495Sjmacd    {
1412146515Sru      /* If -u was specified, output the line only if distinct from
1413146515Sru         previous one.  */
141421495Sjmacd      if (next_line == linearray
141521495Sjmacd      /* Compare previous line with this one, using only the
141621495Sjmacd         explicitly specd keyfields. */
1417146515Sru          || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1418146515Sru                              num_keyfields - 1))
141942660Smarkm        {
142042660Smarkm          char *p = *next_line;
142142660Smarkm          char c;
142221495Sjmacd
142342660Smarkm          while ((c = *p++) && c != '\n')
142442660Smarkm            /* Do nothing. */ ;
142542660Smarkm          *(p - 1) = 0;
142642660Smarkm          indexify (*next_line, ostream);
142742660Smarkm        }
142821495Sjmacd    }
142921495Sjmacd
143021495Sjmacd  finish_index (ostream);
143121495Sjmacd}
143221495Sjmacd
143321495Sjmacd/* Assume (and optionally verify) that each input file is sorted;
143421495Sjmacd   merge them and output the result.
143521495Sjmacd   Returns nonzero if any input file fails to be sorted.
143621495Sjmacd
143721495Sjmacd   This is the high-level interface that can handle an unlimited
143821495Sjmacd   number of files.  */
143921495Sjmacd
144021495Sjmacd#define MAX_DIRECT_MERGE 10
144121495Sjmacd
144221495Sjmacdint
1443146515Srumerge_files (char **infiles, int nfiles, char *outfile)
144421495Sjmacd{
144521495Sjmacd  char **tempfiles;
144621495Sjmacd  int ntemps;
144721495Sjmacd  int i;
144821495Sjmacd  int value = 0;
144921495Sjmacd  int start_tempcount = tempcount;
145021495Sjmacd
145121495Sjmacd  if (nfiles <= MAX_DIRECT_MERGE)
145221495Sjmacd    return merge_direct (infiles, nfiles, outfile);
145321495Sjmacd
145421495Sjmacd  /* Merge groups of MAX_DIRECT_MERGE input files at a time,
145521495Sjmacd     making a temporary file to hold each group's result.  */
145621495Sjmacd
145721495Sjmacd  ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
145821495Sjmacd  tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
145921495Sjmacd  for (i = 0; i < ntemps; i++)
146021495Sjmacd    {
146121495Sjmacd      int nf = MAX_DIRECT_MERGE;
146221495Sjmacd      if (i + 1 == ntemps)
146342660Smarkm        nf = nfiles - i * MAX_DIRECT_MERGE;
146421495Sjmacd      tempfiles[i] = maketempname (++tempcount);
146521495Sjmacd      value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
146621495Sjmacd    }
146721495Sjmacd
146821495Sjmacd  /* All temporary files that existed before are no longer needed
146921495Sjmacd     since their contents have been merged into our new tempfiles.
147021495Sjmacd     So delete them.  */
147121495Sjmacd  flush_tempfiles (start_tempcount);
147221495Sjmacd
147321495Sjmacd  /* Now merge the temporary files we created.  */
147421495Sjmacd
147521495Sjmacd  merge_files (tempfiles, ntemps, outfile);
147621495Sjmacd
147721495Sjmacd  free (tempfiles);
147821495Sjmacd
147921495Sjmacd  return value;
148021495Sjmacd}
148121495Sjmacd
148221495Sjmacd/* Assume (and optionally verify) that each input file is sorted;
148321495Sjmacd   merge them and output the result.
148421495Sjmacd   Returns nonzero if any input file fails to be sorted.
148521495Sjmacd
148621495Sjmacd   This version of merging will not work if the number of
148721495Sjmacd   input files gets too high.  Higher level functions
148821495Sjmacd   use it only with a bounded number of input files.  */
148921495Sjmacd
149021495Sjmacdint
1491146515Srumerge_direct (char **infiles, int nfiles, char *outfile)
149221495Sjmacd{
149321495Sjmacd  struct linebuffer *lb1, *lb2;
149421495Sjmacd  struct linebuffer **thisline, **prevline;
149521495Sjmacd  FILE **streams;
149621495Sjmacd  int i;
149721495Sjmacd  int nleft;
149821495Sjmacd  int lossage = 0;
149921495Sjmacd  int *file_lossage;
150021495Sjmacd  struct linebuffer *prev_out = 0;
150121495Sjmacd  FILE *ostream = stdout;
150221495Sjmacd
150321495Sjmacd  if (outfile)
150421495Sjmacd    {
150521495Sjmacd      ostream = fopen (outfile, "w");
150621495Sjmacd    }
150721495Sjmacd  if (!ostream)
150821495Sjmacd    pfatal_with_name (outfile);
150921495Sjmacd
151021495Sjmacd  init_index ();
151121495Sjmacd
151221495Sjmacd  if (nfiles == 0)
151321495Sjmacd    {
151421495Sjmacd      if (outfile)
151542660Smarkm        fclose (ostream);
151621495Sjmacd      return 0;
151721495Sjmacd    }
151821495Sjmacd
1519146515Sru  /* For each file, make two line buffers.  Also, for each file, there
1520146515Sru     is an element of `thisline' which points at any time to one of the
1521146515Sru     file's two buffers, and an element of `prevline' which points to
1522146515Sru     the other buffer.  `thisline' is supposed to point to the next
1523146515Sru     available line from the file, while `prevline' holds the last file
1524146515Sru     line used, which is remembered so that we can verify that the file
1525146515Sru     is properly sorted. */
152621495Sjmacd
152721495Sjmacd  /* lb1 and lb2 contain one buffer each per file. */
152821495Sjmacd  lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
152921495Sjmacd  lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
153021495Sjmacd
1531146515Sru  /* thisline[i] points to the linebuffer holding the next available
1532146515Sru     line in file i, or is zero if there are no lines left in that file.  */
153321495Sjmacd  thisline = (struct linebuffer **)
153421495Sjmacd    xmalloc (nfiles * sizeof (struct linebuffer *));
153521495Sjmacd  /* prevline[i] points to the linebuffer holding the last used line
153621495Sjmacd     from file i.  This is just for verifying that file i is properly
153721495Sjmacd     sorted.  */
153821495Sjmacd  prevline = (struct linebuffer **)
153921495Sjmacd    xmalloc (nfiles * sizeof (struct linebuffer *));
154021495Sjmacd  /* streams[i] holds the input stream for file i.  */
154121495Sjmacd  streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
154221495Sjmacd  /* file_lossage[i] is nonzero if we already know file i is not
154321495Sjmacd     properly sorted.  */
154421495Sjmacd  file_lossage = (int *) xmalloc (nfiles * sizeof (int));
154521495Sjmacd
154621495Sjmacd  /* Allocate and initialize all that storage. */
154721495Sjmacd
154821495Sjmacd  for (i = 0; i < nfiles; i++)
154921495Sjmacd    {
155021495Sjmacd      initbuffer (&lb1[i]);
155121495Sjmacd      initbuffer (&lb2[i]);
155221495Sjmacd      thisline[i] = &lb1[i];
155321495Sjmacd      prevline[i] = &lb2[i];
155421495Sjmacd      file_lossage[i] = 0;
155521495Sjmacd      streams[i] = fopen (infiles[i], "r");
155621495Sjmacd      if (!streams[i])
155742660Smarkm        pfatal_with_name (infiles[i]);
155821495Sjmacd
155921495Sjmacd      readline (thisline[i], streams[i]);
156021495Sjmacd    }
156121495Sjmacd
156221495Sjmacd  /* Keep count of number of files not at eof. */
156321495Sjmacd  nleft = nfiles;
156421495Sjmacd
156521495Sjmacd  while (nleft)
156621495Sjmacd    {
156721495Sjmacd      struct linebuffer *best = 0;
156821495Sjmacd      struct linebuffer *exch;
156921495Sjmacd      int bestfile = -1;
157021495Sjmacd      int i;
157121495Sjmacd
157221495Sjmacd      /* Look at the next avail line of each file; choose the least one.  */
157321495Sjmacd
157421495Sjmacd      for (i = 0; i < nfiles; i++)
157542660Smarkm        {
157642660Smarkm          if (thisline[i] &&
157742660Smarkm              (!best ||
157842660Smarkm               0 < compare_general (best->buffer, thisline[i]->buffer,
157942660Smarkm                                 (long) bestfile, (long) i, num_keyfields)))
158042660Smarkm            {
158142660Smarkm              best = thisline[i];
158242660Smarkm              bestfile = i;
158342660Smarkm            }
158442660Smarkm        }
158521495Sjmacd
158621495Sjmacd      /* Output that line, unless it matches the previous one and we
158742660Smarkm         don't want duplicates. */
158821495Sjmacd
158921495Sjmacd      if (!(prev_out &&
159042660Smarkm            !compare_general (prev_out->buffer,
159142660Smarkm                              best->buffer, 0L, 1L, num_keyfields - 1)))
159242660Smarkm        indexify (best->buffer, ostream);
159321495Sjmacd      prev_out = best;
159421495Sjmacd
159521495Sjmacd      /* Now make the line the previous of its file, and fetch a new
159642660Smarkm         line from that file.  */
159721495Sjmacd
159821495Sjmacd      exch = prevline[bestfile];
159921495Sjmacd      prevline[bestfile] = thisline[bestfile];
160021495Sjmacd      thisline[bestfile] = exch;
160121495Sjmacd
160221495Sjmacd      while (1)
160342660Smarkm        {
160442660Smarkm          /* If the file has no more, mark it empty. */
160521495Sjmacd
160642660Smarkm          if (feof (streams[bestfile]))
160742660Smarkm            {
160842660Smarkm              thisline[bestfile] = 0;
160942660Smarkm              /* Update the number of files still not empty. */
161042660Smarkm              nleft--;
161142660Smarkm              break;
161242660Smarkm            }
161342660Smarkm          readline (thisline[bestfile], streams[bestfile]);
161442660Smarkm          if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
161542660Smarkm            break;
161642660Smarkm        }
161721495Sjmacd    }
161821495Sjmacd
161921495Sjmacd  finish_index (ostream);
162021495Sjmacd
162121495Sjmacd  /* Free all storage and close all input streams. */
162221495Sjmacd
162321495Sjmacd  for (i = 0; i < nfiles; i++)
162421495Sjmacd    {
162521495Sjmacd      fclose (streams[i]);
162621495Sjmacd      free (lb1[i].buffer);
162721495Sjmacd      free (lb2[i].buffer);
162821495Sjmacd    }
162921495Sjmacd  free (file_lossage);
163021495Sjmacd  free (lb1);
163121495Sjmacd  free (lb2);
163221495Sjmacd  free (thisline);
163321495Sjmacd  free (prevline);
163421495Sjmacd  free (streams);
163521495Sjmacd
163621495Sjmacd  if (outfile)
163721495Sjmacd    fclose (ostream);
163821495Sjmacd
163921495Sjmacd  return lossage;
164021495Sjmacd}
164121495Sjmacd
164221495Sjmacd/* Print error message and exit.  */
164321495Sjmacd
164421495Sjmacdvoid
1645146515Srufatal (const char *format, const char *arg)
164621495Sjmacd{
164721495Sjmacd  error (format, arg);
164856160Sru  xexit (1);
164921495Sjmacd}
165021495Sjmacd
165121495Sjmacd/* Print error message.  FORMAT is printf control string, ARG is arg for it. */
165221495Sjmacdvoid
1653146515Sruerror (const char *format, const char *arg)
165421495Sjmacd{
165521495Sjmacd  printf ("%s: ", program_name);
165621495Sjmacd  printf (format, arg);
165721495Sjmacd  if (format[strlen (format) -1] != '\n')
165821495Sjmacd    printf ("\n");
165921495Sjmacd}
166021495Sjmacd
166121495Sjmacdvoid
1662146515Sruperror_with_name (const char *name)
166321495Sjmacd{
1664116525Sru  fprintf (stderr, "%s: ", program_name);
1665116525Sru  perror (name);
166621495Sjmacd}
166721495Sjmacd
166821495Sjmacdvoid
1669146515Srupfatal_with_name (const char *name)
167021495Sjmacd{
1671116525Sru  perror_with_name (name);
167256160Sru  xexit (1);
167321495Sjmacd}
167421495Sjmacd
1675116525Sru
1676116525Sru/* Return a newly-allocated string concatenating S1 and S2.  */
167721495Sjmacd
167821495Sjmacdchar *
1679146515Sruconcat (char *s1, char *s2)
168021495Sjmacd{
1681116525Sru  int len1 = strlen (s1), len2 = strlen (s2);
1682116525Sru  char *result = (char *) xmalloc (len1 + len2 + 1);
168321495Sjmacd
168421495Sjmacd  strcpy (result, s1);
168521495Sjmacd  strcpy (result + len1, s2);
1686116525Sru  *(result + len1 + len2) = 0;
168721495Sjmacd
168821495Sjmacd  return result;
168921495Sjmacd}
1690