132785Speter/* File I/O for GNU DIFF.
232785Speter   Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
332785Speter
432785SpeterThis file is part of GNU DIFF.
532785Speter
632785SpeterGNU DIFF is free software; you can redistribute it and/or modify
732785Speterit under the terms of the GNU General Public License as published by
832785Speterthe Free Software Foundation; either version 2, or (at your option)
932785Speterany later version.
1032785Speter
1132785SpeterGNU DIFF is distributed in the hope that it will be useful,
1232785Speterbut WITHOUT ANY WARRANTY; without even the implied warranty of
1332785SpeterMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1432785SpeterGNU General Public License for more details.
1532785Speter
1654427Speter*/
1732785Speter
1832785Speter#include "diff.h"
1932785Speter
2032785Speter/* Rotate a value n bits to the left. */
2132785Speter#define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
2232785Speter#define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
2332785Speter
2432785Speter/* Given a hash value and a new character, return a new hash value. */
2532785Speter#define HASH(h, c) ((c) + ROL (h, 7))
2632785Speter
2732785Speter/* Guess remaining number of lines from number N of lines so far,
2832785Speter   size S so far, and total size T.  */
2932785Speter#define GUESS_LINES(n,s,t) (((t) - (s)) / ((n) < 10 ? 32 : (s) / ((n)-1)) + 5)
3032785Speter
3132785Speter/* Type used for fast prefix comparison in find_identical_ends.  */
3232785Speter#ifndef word
3332785Speter#define word int
3432785Speter#endif
3532785Speter
3632785Speter/* Lines are put into equivalence classes (of lines that match in line_cmp).
3732785Speter   Each equivalence class is represented by one of these structures,
3832785Speter   but only while the classes are being computed.
3932785Speter   Afterward, each class is represented by a number.  */
4032785Speterstruct equivclass
4132785Speter{
4232785Speter  int next;	/* Next item in this bucket. */
4332785Speter  unsigned hash;	/* Hash of lines in this class.  */
4432785Speter  char const *line;	/* A line that fits this class. */
4532785Speter  size_t length;	/* That line's length, not counting its newline.  */
4632785Speter};
4732785Speter
4832785Speter/* Hash-table: array of buckets, each being a chain of equivalence classes.
4932785Speter   buckets[-1] is reserved for incomplete lines.  */
5032785Speterstatic int *buckets;
5132785Speter
5232785Speter/* Number of buckets in the hash table array, not counting buckets[-1]. */
5332785Speterstatic int nbuckets;
5432785Speter
5532785Speter/* Array in which the equivalence classes are allocated.
5632785Speter   The bucket-chains go through the elements in this array.
5732785Speter   The number of an equivalence class is its index in this array.  */
5832785Speterstatic struct equivclass *equivs;
5932785Speter
6032785Speter/* Index of first free element in the array `equivs'.  */
6132785Speterstatic int equivs_index;
6232785Speter
6332785Speter/* Number of elements allocated in the array `equivs'.  */
6432785Speterstatic int equivs_alloc;
6532785Speter
6632785Speterstatic void find_and_hash_each_line PARAMS((struct file_data *));
6732785Speterstatic void find_identical_ends PARAMS((struct file_data[]));
6832785Speterstatic void prepare_text_end PARAMS((struct file_data *));
6932785Speter
7032785Speter/* Check for binary files and compare them for exact identity.  */
7132785Speter
7232785Speter/* Return 1 if BUF contains a non text character.
7332785Speter   SIZE is the number of characters in BUF.  */
7432785Speter
7532785Speter#define binary_file_p(buf, size) (memchr (buf, '\0', size) != 0)
7632785Speter
7732785Speter/* Get ready to read the current file.
7832785Speter   Return nonzero if SKIP_TEST is zero,
7932785Speter   and if it appears to be a binary file.  */
8032785Speter
8132785Speterint
8232785Spetersip (current, skip_test)
8332785Speter     struct file_data *current;
8432785Speter     int skip_test;
8532785Speter{
8632785Speter  /* If we have a nonexistent file at this stage, treat it as empty.  */
8732785Speter  if (current->desc < 0)
8832785Speter    {
8932785Speter      /* Leave room for a sentinel.  */
9032785Speter      current->bufsize = sizeof (word);
9132785Speter      current->buffer = xmalloc (current->bufsize);
9232785Speter    }
9332785Speter  else
9432785Speter    {
9532785Speter      current->bufsize = STAT_BLOCKSIZE (current->stat);
9632785Speter      current->buffer = xmalloc (current->bufsize);
9732785Speter
9832785Speter      if (! skip_test)
9932785Speter	{
10032785Speter	  /* Check first part of file to see if it's a binary file.  */
10132785Speter#if HAVE_SETMODE
10232785Speter	  int oldmode = setmode (current->desc, O_BINARY);
10332785Speter#endif
104102840Speter	  ssize_t n = read (current->desc, current->buffer, current->bufsize);
10532785Speter	  if (n == -1)
10632785Speter	    pfatal_with_name (current->name);
10732785Speter	  current->buffered_chars = n;
10832785Speter#if HAVE_SETMODE
10932785Speter	  if (oldmode != O_BINARY)
11032785Speter	    {
11132785Speter	      if (lseek (current->desc, - (off_t) n, SEEK_CUR) == -1)
11232785Speter		pfatal_with_name (current->name);
11332785Speter	      setmode (current->desc, oldmode);
11432785Speter	      current->buffered_chars = 0;
11532785Speter	    }
11632785Speter#endif
11732785Speter	  return binary_file_p (current->buffer, n);
11832785Speter	}
11932785Speter    }
12032785Speter
12132785Speter  current->buffered_chars = 0;
12232785Speter  return 0;
12332785Speter}
12432785Speter
12532785Speter/* Slurp the rest of the current file completely into memory.  */
12632785Speter
12732785Spetervoid
12832785Speterslurp (current)
12932785Speter     struct file_data *current;
13032785Speter{
131102840Speter  ssize_t cc;
13232785Speter
13332785Speter  if (current->desc < 0)
13432785Speter    /* The file is nonexistent.  */
13532785Speter    ;
13632785Speter  else if (S_ISREG (current->stat.st_mode))
13732785Speter    {
13832785Speter      /* It's a regular file; slurp in the rest all at once.  */
13932785Speter
14032785Speter      /* Get the size out of the stat block.
14132785Speter	 Allocate enough room for appended newline and sentinel.  */
14232785Speter      cc = current->stat.st_size + 1 + sizeof (word);
14332785Speter      if (current->bufsize < cc)
14432785Speter	{
14532785Speter	  current->bufsize = cc;
14632785Speter	  current->buffer = xrealloc (current->buffer, cc);
14732785Speter	}
14832785Speter
14932785Speter      if (current->buffered_chars < current->stat.st_size)
15032785Speter	{
15132785Speter	  cc = read (current->desc,
15232785Speter		     current->buffer + current->buffered_chars,
15332785Speter		     current->stat.st_size - current->buffered_chars);
15432785Speter	  if (cc == -1)
15532785Speter	    pfatal_with_name (current->name);
15632785Speter	  current->buffered_chars += cc;
15732785Speter	}
15832785Speter    }
15932785Speter  /* It's not a regular file; read it, growing the buffer as needed.  */
16032785Speter  else if (always_text_flag || current->buffered_chars != 0)
16132785Speter    {
16232785Speter      for (;;)
16332785Speter	{
16432785Speter	  if (current->buffered_chars == current->bufsize)
16532785Speter	    {
16632785Speter	      current->bufsize = current->bufsize * 2;
16732785Speter	      current->buffer = xrealloc (current->buffer, current->bufsize);
16832785Speter	    }
16932785Speter	  cc = read (current->desc,
17032785Speter		     current->buffer + current->buffered_chars,
17132785Speter		     current->bufsize - current->buffered_chars);
17232785Speter	  if (cc == 0)
17332785Speter	    break;
17432785Speter	  if (cc == -1)
17532785Speter	    pfatal_with_name (current->name);
17632785Speter	  current->buffered_chars += cc;
17732785Speter	}
17832785Speter      /* Allocate just enough room for appended newline and sentinel.  */
17932785Speter      current->bufsize = current->buffered_chars + 1 + sizeof (word);
18032785Speter      current->buffer = xrealloc (current->buffer, current->bufsize);
18132785Speter    }
18232785Speter}
18332785Speter
18432785Speter/* Split the file into lines, simultaneously computing the equivalence class for
18532785Speter   each line. */
18632785Speter
18732785Speterstatic void
18832785Speterfind_and_hash_each_line (current)
18932785Speter     struct file_data *current;
19032785Speter{
19132785Speter  unsigned h;
19232785Speter  unsigned char const *p = (unsigned char const *) current->prefix_end;
19332785Speter  unsigned char c;
19432785Speter  int i, *bucket;
19532785Speter  size_t length;
19632785Speter
19732785Speter  /* Cache often-used quantities in local variables to help the compiler.  */
19832785Speter  char const **linbuf = current->linbuf;
19932785Speter  int alloc_lines = current->alloc_lines;
20032785Speter  int line = 0;
20132785Speter  int linbuf_base = current->linbuf_base;
20232785Speter  int *cureqs = (int *) xmalloc (alloc_lines * sizeof (int));
20332785Speter  struct equivclass *eqs = equivs;
20432785Speter  int eqs_index = equivs_index;
20532785Speter  int eqs_alloc = equivs_alloc;
20632785Speter  char const *suffix_begin = current->suffix_begin;
20732785Speter  char const *bufend = current->buffer + current->buffered_chars;
20832785Speter  int use_line_cmp = ignore_some_line_changes;
20932785Speter
21032785Speter  while ((char const *) p < suffix_begin)
21132785Speter    {
21232785Speter      char const *ip = (char const *) p;
21332785Speter
21432785Speter      /* Compute the equivalence class for this line.  */
21532785Speter
21632785Speter      h = 0;
21732785Speter
21832785Speter      /* Hash this line until we find a newline. */
21932785Speter      if (ignore_case_flag)
22032785Speter	{
22132785Speter	  if (ignore_all_space_flag)
22232785Speter	    while ((c = *p++) != '\n')
22332785Speter	      {
22432785Speter		if (! ISSPACE (c))
22532785Speter		  h = HASH (h, ISUPPER (c) ? tolower (c) : c);
22632785Speter	      }
22732785Speter	  else if (ignore_space_change_flag)
22832785Speter	    while ((c = *p++) != '\n')
22932785Speter	      {
23032785Speter		if (ISSPACE (c))
23132785Speter		  {
23232785Speter		    for (;;)
23332785Speter		      {
23432785Speter			c = *p++;
23532785Speter			if (!ISSPACE (c))
23632785Speter			  break;
23732785Speter			if (c == '\n')
23832785Speter			  goto hashing_done;
23932785Speter		      }
24032785Speter		    h = HASH (h, ' ');
24132785Speter		  }
24232785Speter		/* C is now the first non-space.  */
24332785Speter		h = HASH (h, ISUPPER (c) ? tolower (c) : c);
24432785Speter	      }
24532785Speter	  else
24632785Speter	    while ((c = *p++) != '\n')
24732785Speter	      h = HASH (h, ISUPPER (c) ? tolower (c) : c);
24832785Speter	}
24932785Speter      else
25032785Speter	{
25132785Speter	  if (ignore_all_space_flag)
25232785Speter	    while ((c = *p++) != '\n')
25332785Speter	      {
25432785Speter		if (! ISSPACE (c))
25532785Speter		  h = HASH (h, c);
25632785Speter	      }
25732785Speter	  else if (ignore_space_change_flag)
25832785Speter	    while ((c = *p++) != '\n')
25932785Speter	      {
26032785Speter		if (ISSPACE (c))
26132785Speter		  {
26232785Speter		    for (;;)
26332785Speter		      {
26432785Speter			c = *p++;
26532785Speter			if (!ISSPACE (c))
26632785Speter			  break;
26732785Speter			if (c == '\n')
26832785Speter			  goto hashing_done;
26932785Speter		      }
27032785Speter		    h = HASH (h, ' ');
27132785Speter		  }
27232785Speter		/* C is now the first non-space.  */
27332785Speter		h = HASH (h, c);
27432785Speter	      }
27532785Speter	  else
27632785Speter	    while ((c = *p++) != '\n')
27732785Speter	      h = HASH (h, c);
27832785Speter	}
27932785Speter   hashing_done:;
28032785Speter
28132785Speter      bucket = &buckets[h % nbuckets];
28232785Speter      length = (char const *) p - ip - 1;
28332785Speter
28432785Speter      if ((char const *) p == bufend
28532785Speter	  && current->missing_newline
28632785Speter	  && ROBUST_OUTPUT_STYLE (output_style))
28732785Speter	{
28832785Speter	  /* This line is incomplete.  If this is significant,
28932785Speter	     put the line into bucket[-1].  */
29032785Speter	  if (! (ignore_space_change_flag | ignore_all_space_flag))
29132785Speter	    bucket = &buckets[-1];
29232785Speter
29332785Speter	  /* Omit the inserted newline when computing linbuf later.  */
29432785Speter	  p--;
29532785Speter	  bufend = suffix_begin = (char const *) p;
29632785Speter	}
29732785Speter
29832785Speter      for (i = *bucket;  ;  i = eqs[i].next)
29932785Speter	if (!i)
30032785Speter	  {
30132785Speter	    /* Create a new equivalence class in this bucket. */
30232785Speter	    i = eqs_index++;
30332785Speter	    if (i == eqs_alloc)
30432785Speter	      eqs = (struct equivclass *)
30532785Speter		      xrealloc (eqs, (eqs_alloc*=2) * sizeof(*eqs));
30632785Speter	    eqs[i].next = *bucket;
30732785Speter	    eqs[i].hash = h;
30832785Speter	    eqs[i].line = ip;
30932785Speter	    eqs[i].length = length;
31032785Speter	    *bucket = i;
31132785Speter	    break;
31232785Speter	  }
31332785Speter	else if (eqs[i].hash == h)
31432785Speter	  {
31532785Speter	    char const *eqline = eqs[i].line;
31632785Speter
31732785Speter	    /* Reuse existing equivalence class if the lines are identical.
31832785Speter	       This detects the common case of exact identity
31932785Speter	       faster than complete comparison would.  */
32032785Speter	    if (eqs[i].length == length && memcmp (eqline, ip, length) == 0)
32132785Speter	      break;
32232785Speter
32332785Speter	    /* Reuse existing class if line_cmp reports the lines equal.  */
32432785Speter	    if (use_line_cmp && line_cmp (eqline, ip) == 0)
32532785Speter	      break;
32632785Speter	  }
32732785Speter
32832785Speter      /* Maybe increase the size of the line table. */
32932785Speter      if (line == alloc_lines)
33032785Speter	{
33132785Speter	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
33232785Speter	  alloc_lines = 2 * alloc_lines - linbuf_base;
33332785Speter	  cureqs = (int *) xrealloc (cureqs, alloc_lines * sizeof (*cureqs));
33432785Speter	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
33532785Speter					     (alloc_lines - linbuf_base)
33632785Speter					     * sizeof (*linbuf))
33732785Speter		   - linbuf_base;
33832785Speter	}
33932785Speter      linbuf[line] = ip;
34032785Speter      cureqs[line] = i;
34132785Speter      ++line;
34232785Speter    }
34332785Speter
34432785Speter  current->buffered_lines = line;
34532785Speter
34632785Speter  for (i = 0;  ;  i++)
34732785Speter    {
34832785Speter      /* Record the line start for lines in the suffix that we care about.
34932785Speter	 Record one more line start than lines,
35032785Speter	 so that we can compute the length of any buffered line.  */
35132785Speter      if (line == alloc_lines)
35232785Speter	{
35332785Speter	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
35432785Speter	  alloc_lines = 2 * alloc_lines - linbuf_base;
35532785Speter	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
35632785Speter					     (alloc_lines - linbuf_base)
35732785Speter					     * sizeof (*linbuf))
35832785Speter		   - linbuf_base;
35932785Speter	}
36032785Speter      linbuf[line] = (char const *) p;
36132785Speter
36232785Speter      if ((char const *) p == bufend)
36332785Speter	break;
36432785Speter
36532785Speter      if (context <= i && no_diff_means_no_output)
36632785Speter	break;
36732785Speter
36832785Speter      line++;
36932785Speter
37032785Speter      while (*p++ != '\n')
37132785Speter	;
37232785Speter    }
37332785Speter
37432785Speter  /* Done with cache in local variables.  */
37532785Speter  current->linbuf = linbuf;
37632785Speter  current->valid_lines = line;
37732785Speter  current->alloc_lines = alloc_lines;
37832785Speter  current->equivs = cureqs;
37932785Speter  equivs = eqs;
38032785Speter  equivs_alloc = eqs_alloc;
38132785Speter  equivs_index = eqs_index;
38232785Speter}
38332785Speter
38432785Speter/* Prepare the end of the text.  Make sure it's initialized.
38532785Speter   Make sure text ends in a newline,
38632785Speter   but remember that we had to add one.  */
38732785Speter
38832785Speterstatic void
38932785Speterprepare_text_end (current)
39032785Speter     struct file_data *current;
39132785Speter{
39232785Speter  size_t buffered_chars = current->buffered_chars;
39332785Speter  char *p = current->buffer;
39432785Speter
39532785Speter  if (buffered_chars == 0 || p[buffered_chars - 1] == '\n')
39632785Speter    current->missing_newline = 0;
39732785Speter  else
39832785Speter    {
39932785Speter      p[buffered_chars++] = '\n';
40032785Speter      current->buffered_chars = buffered_chars;
40132785Speter      current->missing_newline = 1;
40232785Speter    }
40332785Speter
40432785Speter  /* Don't use uninitialized storage when planting or using sentinels.  */
40532785Speter  if (p)
40632785Speter    bzero (p + buffered_chars, sizeof (word));
40732785Speter}
40832785Speter
40932785Speter/* Given a vector of two file_data objects, find the identical
41032785Speter   prefixes and suffixes of each object. */
41132785Speter
41232785Speterstatic void
41332785Speterfind_identical_ends (filevec)
41432785Speter     struct file_data filevec[];
41532785Speter{
41632785Speter  word *w0, *w1;
41732785Speter  char *p0, *p1, *buffer0, *buffer1;
41832785Speter  char const *end0, *beg0;
41932785Speter  char const **linbuf0, **linbuf1;
42032785Speter  int i, lines;
42132785Speter  size_t n0, n1, tem;
42232785Speter  int alloc_lines0, alloc_lines1;
42332785Speter  int buffered_prefix, prefix_count, prefix_mask;
42432785Speter
42532785Speter  slurp (&filevec[0]);
42632785Speter  if (filevec[0].desc != filevec[1].desc)
42732785Speter    slurp (&filevec[1]);
42832785Speter  else
42932785Speter    {
43032785Speter      filevec[1].buffer = filevec[0].buffer;
43132785Speter      filevec[1].bufsize = filevec[0].bufsize;
43232785Speter      filevec[1].buffered_chars = filevec[0].buffered_chars;
43332785Speter    }
43432785Speter  for (i = 0; i < 2; i++)
43532785Speter    prepare_text_end (&filevec[i]);
43632785Speter
43732785Speter  /* Find identical prefix.  */
43832785Speter
43932785Speter  p0 = buffer0 = filevec[0].buffer;
44032785Speter  p1 = buffer1 = filevec[1].buffer;
44132785Speter
44232785Speter  n0 = filevec[0].buffered_chars;
44332785Speter  n1 = filevec[1].buffered_chars;
44432785Speter
44532785Speter  if (p0 == p1)
44632785Speter    /* The buffers are the same; sentinels won't work.  */
44732785Speter    p0 = p1 += n1;
44832785Speter  else
44932785Speter    {
45032785Speter      /* Insert end sentinels, in this case characters that are guaranteed
45132785Speter	 to make the equality test false, and thus terminate the loop.  */
45232785Speter
45332785Speter      if (n0 < n1)
45432785Speter	p0[n0] = ~p1[n0];
45532785Speter      else
45632785Speter	p1[n1] = ~p0[n1];
45732785Speter
45832785Speter      /* Loop until first mismatch, or to the sentinel characters.  */
45932785Speter
46032785Speter      /* Compare a word at a time for speed.  */
46132785Speter      w0 = (word *) p0;
46232785Speter      w1 = (word *) p1;
46332785Speter      while (*w0++ == *w1++)
46432785Speter	;
46532785Speter      --w0, --w1;
46632785Speter
46732785Speter      /* Do the last few bytes of comparison a byte at a time.  */
46832785Speter      p0 = (char *) w0;
46932785Speter      p1 = (char *) w1;
47032785Speter      while (*p0++ == *p1++)
47132785Speter	;
47232785Speter      --p0, --p1;
47332785Speter
47432785Speter      /* Don't mistakenly count missing newline as part of prefix. */
47532785Speter      if (ROBUST_OUTPUT_STYLE (output_style)
47632785Speter	  && (buffer0 + n0 - filevec[0].missing_newline < p0)
47732785Speter	     !=
47832785Speter	     (buffer1 + n1 - filevec[1].missing_newline < p1))
47932785Speter	--p0, --p1;
48032785Speter    }
48132785Speter
48232785Speter  /* Now P0 and P1 point at the first nonmatching characters.  */
48332785Speter
48432785Speter  /* Skip back to last line-beginning in the prefix,
48532785Speter     and then discard up to HORIZON_LINES lines from the prefix.  */
48632785Speter  i = horizon_lines;
48732785Speter  while (p0 != buffer0 && (p0[-1] != '\n' || i--))
48832785Speter    --p0, --p1;
48932785Speter
49032785Speter  /* Record the prefix.  */
49132785Speter  filevec[0].prefix_end = p0;
49232785Speter  filevec[1].prefix_end = p1;
49332785Speter
49432785Speter  /* Find identical suffix.  */
49532785Speter
49632785Speter  /* P0 and P1 point beyond the last chars not yet compared.  */
49732785Speter  p0 = buffer0 + n0;
49832785Speter  p1 = buffer1 + n1;
49932785Speter
50032785Speter  if (! ROBUST_OUTPUT_STYLE (output_style)
50132785Speter      || filevec[0].missing_newline == filevec[1].missing_newline)
50232785Speter    {
50332785Speter      end0 = p0;	/* Addr of last char in file 0.  */
50432785Speter
50532785Speter      /* Get value of P0 at which we should stop scanning backward:
50632785Speter	 this is when either P0 or P1 points just past the last char
50732785Speter	 of the identical prefix.  */
50832785Speter      beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
50932785Speter
51032785Speter      /* Scan back until chars don't match or we reach that point.  */
511128266Speter      for (; p0 != beg0; p0--, p1--)
512128266Speter	if (*p0 != *p1)
51332785Speter	  {
51432785Speter	    /* Point at the first char of the matching suffix.  */
51532785Speter	    beg0 = p0;
51632785Speter	    break;
51732785Speter	  }
51832785Speter
51932785Speter      /* Are we at a line-beginning in both files?  If not, add the rest of
52032785Speter	 this line to the main body.  Discard up to HORIZON_LINES lines from
52132785Speter	 the identical suffix.  Also, discard one extra line,
52232785Speter	 because shift_boundaries may need it.  */
52332785Speter      i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
52432785Speter			    &&
52532785Speter			    (buffer1 == p1 || p1[-1] == '\n'));
52632785Speter      while (i-- && p0 != end0)
52732785Speter	while (*p0++ != '\n')
52832785Speter	  ;
52932785Speter
53032785Speter      p1 += p0 - beg0;
53132785Speter    }
53232785Speter
53332785Speter  /* Record the suffix.  */
53432785Speter  filevec[0].suffix_begin = p0;
53532785Speter  filevec[1].suffix_begin = p1;
53632785Speter
53732785Speter  /* Calculate number of lines of prefix to save.
53832785Speter
53932785Speter     prefix_count == 0 means save the whole prefix;
54032785Speter     we need this with for options like -D that output the whole file.
54132785Speter     We also need it for options like -F that output some preceding line;
54232785Speter     at least we will need to find the last few lines,
54332785Speter     but since we don't know how many, it's easiest to find them all.
54432785Speter
54532785Speter     Otherwise, prefix_count != 0.  Save just prefix_count lines at start
54632785Speter     of the line buffer; they'll be moved to the proper location later.
54732785Speter     Handle 1 more line than the context says (because we count 1 too many),
54832785Speter     rounded up to the next power of 2 to speed index computation.  */
54932785Speter
55032785Speter  if (no_diff_means_no_output && ! function_regexp_list)
55132785Speter    {
55232785Speter      for (prefix_count = 1;  prefix_count < context + 1;  prefix_count *= 2)
55332785Speter	;
55432785Speter      prefix_mask = prefix_count - 1;
55532785Speter      alloc_lines0
55632785Speter	= prefix_count
55732785Speter	  + GUESS_LINES (0, 0, p0 - filevec[0].prefix_end)
55832785Speter	  + context;
55932785Speter    }
56032785Speter  else
56132785Speter    {
56232785Speter      prefix_count = 0;
56332785Speter      prefix_mask = ~0;
56432785Speter      alloc_lines0 = GUESS_LINES (0, 0, n0);
56532785Speter    }
56632785Speter
56732785Speter  lines = 0;
56832785Speter  linbuf0 = (char const **) xmalloc (alloc_lines0 * sizeof (*linbuf0));
56932785Speter
57032785Speter  /* If the prefix is needed, find the prefix lines.  */
57132785Speter  if (! (no_diff_means_no_output
57232785Speter	 && filevec[0].prefix_end == p0
57332785Speter	 && filevec[1].prefix_end == p1))
57432785Speter    {
57532785Speter      p0 = buffer0;
57632785Speter      end0 = filevec[0].prefix_end;
57732785Speter      while (p0 != end0)
57832785Speter	{
57932785Speter	  int l = lines++ & prefix_mask;
58032785Speter	  if (l == alloc_lines0)
58132785Speter	    linbuf0 = (char const **) xrealloc (linbuf0, (alloc_lines0 *= 2)
58232785Speter							 * sizeof(*linbuf0));
58332785Speter	  linbuf0[l] = p0;
58432785Speter	  while (*p0++ != '\n')
58532785Speter	    ;
58632785Speter	}
58732785Speter    }
58832785Speter  buffered_prefix = prefix_count && context < lines ? context : lines;
58932785Speter
59032785Speter  /* Allocate line buffer 1.  */
59132785Speter  tem = prefix_count ? filevec[1].suffix_begin - buffer1 : n1;
59232785Speter
59332785Speter  alloc_lines1
59432785Speter    = (buffered_prefix
59532785Speter       + GUESS_LINES (lines, filevec[1].prefix_end - buffer1, tem)
59632785Speter       + context);
59732785Speter  linbuf1 = (char const **) xmalloc (alloc_lines1 * sizeof (*linbuf1));
59832785Speter
59932785Speter  if (buffered_prefix != lines)
60032785Speter    {
60132785Speter      /* Rotate prefix lines to proper location.  */
60232785Speter      for (i = 0;  i < buffered_prefix;  i++)
60332785Speter	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
60432785Speter      for (i = 0;  i < buffered_prefix;  i++)
60532785Speter	linbuf0[i] = linbuf1[i];
60632785Speter    }
60732785Speter
60832785Speter  /* Initialize line buffer 1 from line buffer 0.  */
60932785Speter  for (i = 0; i < buffered_prefix; i++)
61032785Speter    linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
61132785Speter
61232785Speter  /* Record the line buffer, adjusted so that
61332785Speter     linbuf*[0] points at the first differing line.  */
61432785Speter  filevec[0].linbuf = linbuf0 + buffered_prefix;
61532785Speter  filevec[1].linbuf = linbuf1 + buffered_prefix;
61632785Speter  filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
61732785Speter  filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
61832785Speter  filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
61932785Speter  filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
62032785Speter}
62132785Speter
62232785Speter/* Largest primes less than some power of two, for nbuckets.  Values range
62332785Speter   from useful to preposterous.  If one of these numbers isn't prime
62432785Speter   after all, don't blame it on me, blame it on primes (6) . . . */
62532785Speterstatic int const primes[] =
62632785Speter{
62732785Speter  509,
62832785Speter  1021,
62932785Speter  2039,
63032785Speter  4093,
63132785Speter  8191,
63232785Speter  16381,
63332785Speter  32749,
63432785Speter#if 32767 < INT_MAX
63532785Speter  65521,
63632785Speter  131071,
63732785Speter  262139,
63832785Speter  524287,
63932785Speter  1048573,
64032785Speter  2097143,
64132785Speter  4194301,
64232785Speter  8388593,
64332785Speter  16777213,
64432785Speter  33554393,
64532785Speter  67108859,			/* Preposterously large . . . */
64632785Speter  134217689,
64732785Speter  268435399,
64832785Speter  536870909,
64932785Speter  1073741789,
65032785Speter  2147483647,
65132785Speter#endif
65232785Speter  0
65332785Speter};
65432785Speter
65532785Speter/* Given a vector of two file_data objects, read the file associated
65632785Speter   with each one, and build the table of equivalence classes.
65732785Speter   Return 1 if either file appears to be a binary file.
65832785Speter   If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
65932785Speter
66032785Speterint
66132785Speterread_files (filevec, pretend_binary)
66232785Speter     struct file_data filevec[];
66332785Speter     int pretend_binary;
66432785Speter{
66532785Speter  int i;
66632785Speter  int skip_test = always_text_flag | pretend_binary;
66732785Speter  int appears_binary = pretend_binary | sip (&filevec[0], skip_test);
66832785Speter
66932785Speter  if (filevec[0].desc != filevec[1].desc)
67032785Speter    appears_binary |= sip (&filevec[1], skip_test | appears_binary);
67132785Speter  else
67232785Speter    {
67332785Speter      filevec[1].buffer = filevec[0].buffer;
67432785Speter      filevec[1].bufsize = filevec[0].bufsize;
67532785Speter      filevec[1].buffered_chars = filevec[0].buffered_chars;
67632785Speter    }
67732785Speter  if (appears_binary)
67832785Speter    {
67932785Speter#if HAVE_SETMODE
68032785Speter      setmode (filevec[0].desc, O_BINARY);
68132785Speter      setmode (filevec[1].desc, O_BINARY);
68232785Speter#endif
68332785Speter      return 1;
68432785Speter    }
68532785Speter
68632785Speter  find_identical_ends (filevec);
68732785Speter
68832785Speter  equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
68932785Speter  equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
69032785Speter  /* Equivalence class 0 is permanently safe for lines that were not
69132785Speter     hashed.  Real equivalence classes start at 1. */
69232785Speter  equivs_index = 1;
69332785Speter
69432785Speter  for (i = 0;  primes[i] < equivs_alloc / 3;  i++)
69532785Speter    if (! primes[i])
69632785Speter      abort ();
69732785Speter  nbuckets = primes[i];
69832785Speter
69932785Speter  buckets = (int *) xmalloc ((nbuckets + 1) * sizeof (*buckets));
70032785Speter  bzero (buckets++, (nbuckets + 1) * sizeof (*buckets));
70132785Speter
70232785Speter  for (i = 0; i < 2; i++)
70332785Speter    find_and_hash_each_line (&filevec[i]);
70432785Speter
70532785Speter  filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
70632785Speter
70732785Speter  free (equivs);
70832785Speter  free (buckets - 1);
70932785Speter
71032785Speter  return 0;
71132785Speter}
712