132785Speter/* File I/O for GNU DIFF. 232785Speter Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc. 332785Speter 432785SpeterThis file is part of GNU DIFF. 532785Speter 632785SpeterGNU DIFF is free software; you can redistribute it and/or modify 732785Speterit under the terms of the GNU General Public License as published by 832785Speterthe Free Software Foundation; either version 2, or (at your option) 932785Speterany later version. 1032785Speter 1132785SpeterGNU DIFF is distributed in the hope that it will be useful, 1232785Speterbut WITHOUT ANY WARRANTY; without even the implied warranty of 1332785SpeterMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1432785SpeterGNU General Public License for more details. 1532785Speter 1654427Speter*/ 1732785Speter 1832785Speter#include "diff.h" 1932785Speter 2032785Speter/* Rotate a value n bits to the left. */ 2132785Speter#define UINT_BIT (sizeof (unsigned) * CHAR_BIT) 2232785Speter#define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n))) 2332785Speter 2432785Speter/* Given a hash value and a new character, return a new hash value. */ 2532785Speter#define HASH(h, c) ((c) + ROL (h, 7)) 2632785Speter 2732785Speter/* Guess remaining number of lines from number N of lines so far, 2832785Speter size S so far, and total size T. */ 2932785Speter#define GUESS_LINES(n,s,t) (((t) - (s)) / ((n) < 10 ? 32 : (s) / ((n)-1)) + 5) 3032785Speter 3132785Speter/* Type used for fast prefix comparison in find_identical_ends. */ 3232785Speter#ifndef word 3332785Speter#define word int 3432785Speter#endif 3532785Speter 3632785Speter/* Lines are put into equivalence classes (of lines that match in line_cmp). 3732785Speter Each equivalence class is represented by one of these structures, 3832785Speter but only while the classes are being computed. 3932785Speter Afterward, each class is represented by a number. */ 4032785Speterstruct equivclass 4132785Speter{ 4232785Speter int next; /* Next item in this bucket. */ 4332785Speter unsigned hash; /* Hash of lines in this class. */ 4432785Speter char const *line; /* A line that fits this class. */ 4532785Speter size_t length; /* That line's length, not counting its newline. */ 4632785Speter}; 4732785Speter 4832785Speter/* Hash-table: array of buckets, each being a chain of equivalence classes. 4932785Speter buckets[-1] is reserved for incomplete lines. */ 5032785Speterstatic int *buckets; 5132785Speter 5232785Speter/* Number of buckets in the hash table array, not counting buckets[-1]. */ 5332785Speterstatic int nbuckets; 5432785Speter 5532785Speter/* Array in which the equivalence classes are allocated. 5632785Speter The bucket-chains go through the elements in this array. 5732785Speter The number of an equivalence class is its index in this array. */ 5832785Speterstatic struct equivclass *equivs; 5932785Speter 6032785Speter/* Index of first free element in the array `equivs'. */ 6132785Speterstatic int equivs_index; 6232785Speter 6332785Speter/* Number of elements allocated in the array `equivs'. */ 6432785Speterstatic int equivs_alloc; 6532785Speter 6632785Speterstatic void find_and_hash_each_line PARAMS((struct file_data *)); 6732785Speterstatic void find_identical_ends PARAMS((struct file_data[])); 6832785Speterstatic void prepare_text_end PARAMS((struct file_data *)); 6932785Speter 7032785Speter/* Check for binary files and compare them for exact identity. */ 7132785Speter 7232785Speter/* Return 1 if BUF contains a non text character. 7332785Speter SIZE is the number of characters in BUF. */ 7432785Speter 7532785Speter#define binary_file_p(buf, size) (memchr (buf, '\0', size) != 0) 7632785Speter 7732785Speter/* Get ready to read the current file. 7832785Speter Return nonzero if SKIP_TEST is zero, 7932785Speter and if it appears to be a binary file. */ 8032785Speter 8132785Speterint 8232785Spetersip (current, skip_test) 8332785Speter struct file_data *current; 8432785Speter int skip_test; 8532785Speter{ 8632785Speter /* If we have a nonexistent file at this stage, treat it as empty. */ 8732785Speter if (current->desc < 0) 8832785Speter { 8932785Speter /* Leave room for a sentinel. */ 9032785Speter current->bufsize = sizeof (word); 9132785Speter current->buffer = xmalloc (current->bufsize); 9232785Speter } 9332785Speter else 9432785Speter { 9532785Speter current->bufsize = STAT_BLOCKSIZE (current->stat); 9632785Speter current->buffer = xmalloc (current->bufsize); 9732785Speter 9832785Speter if (! skip_test) 9932785Speter { 10032785Speter /* Check first part of file to see if it's a binary file. */ 10132785Speter#if HAVE_SETMODE 10232785Speter int oldmode = setmode (current->desc, O_BINARY); 10332785Speter#endif 104102840Speter ssize_t n = read (current->desc, current->buffer, current->bufsize); 10532785Speter if (n == -1) 10632785Speter pfatal_with_name (current->name); 10732785Speter current->buffered_chars = n; 10832785Speter#if HAVE_SETMODE 10932785Speter if (oldmode != O_BINARY) 11032785Speter { 11132785Speter if (lseek (current->desc, - (off_t) n, SEEK_CUR) == -1) 11232785Speter pfatal_with_name (current->name); 11332785Speter setmode (current->desc, oldmode); 11432785Speter current->buffered_chars = 0; 11532785Speter } 11632785Speter#endif 11732785Speter return binary_file_p (current->buffer, n); 11832785Speter } 11932785Speter } 12032785Speter 12132785Speter current->buffered_chars = 0; 12232785Speter return 0; 12332785Speter} 12432785Speter 12532785Speter/* Slurp the rest of the current file completely into memory. */ 12632785Speter 12732785Spetervoid 12832785Speterslurp (current) 12932785Speter struct file_data *current; 13032785Speter{ 131102840Speter ssize_t cc; 13232785Speter 13332785Speter if (current->desc < 0) 13432785Speter /* The file is nonexistent. */ 13532785Speter ; 13632785Speter else if (S_ISREG (current->stat.st_mode)) 13732785Speter { 13832785Speter /* It's a regular file; slurp in the rest all at once. */ 13932785Speter 14032785Speter /* Get the size out of the stat block. 14132785Speter Allocate enough room for appended newline and sentinel. */ 14232785Speter cc = current->stat.st_size + 1 + sizeof (word); 14332785Speter if (current->bufsize < cc) 14432785Speter { 14532785Speter current->bufsize = cc; 14632785Speter current->buffer = xrealloc (current->buffer, cc); 14732785Speter } 14832785Speter 14932785Speter if (current->buffered_chars < current->stat.st_size) 15032785Speter { 15132785Speter cc = read (current->desc, 15232785Speter current->buffer + current->buffered_chars, 15332785Speter current->stat.st_size - current->buffered_chars); 15432785Speter if (cc == -1) 15532785Speter pfatal_with_name (current->name); 15632785Speter current->buffered_chars += cc; 15732785Speter } 15832785Speter } 15932785Speter /* It's not a regular file; read it, growing the buffer as needed. */ 16032785Speter else if (always_text_flag || current->buffered_chars != 0) 16132785Speter { 16232785Speter for (;;) 16332785Speter { 16432785Speter if (current->buffered_chars == current->bufsize) 16532785Speter { 16632785Speter current->bufsize = current->bufsize * 2; 16732785Speter current->buffer = xrealloc (current->buffer, current->bufsize); 16832785Speter } 16932785Speter cc = read (current->desc, 17032785Speter current->buffer + current->buffered_chars, 17132785Speter current->bufsize - current->buffered_chars); 17232785Speter if (cc == 0) 17332785Speter break; 17432785Speter if (cc == -1) 17532785Speter pfatal_with_name (current->name); 17632785Speter current->buffered_chars += cc; 17732785Speter } 17832785Speter /* Allocate just enough room for appended newline and sentinel. */ 17932785Speter current->bufsize = current->buffered_chars + 1 + sizeof (word); 18032785Speter current->buffer = xrealloc (current->buffer, current->bufsize); 18132785Speter } 18232785Speter} 18332785Speter 18432785Speter/* Split the file into lines, simultaneously computing the equivalence class for 18532785Speter each line. */ 18632785Speter 18732785Speterstatic void 18832785Speterfind_and_hash_each_line (current) 18932785Speter struct file_data *current; 19032785Speter{ 19132785Speter unsigned h; 19232785Speter unsigned char const *p = (unsigned char const *) current->prefix_end; 19332785Speter unsigned char c; 19432785Speter int i, *bucket; 19532785Speter size_t length; 19632785Speter 19732785Speter /* Cache often-used quantities in local variables to help the compiler. */ 19832785Speter char const **linbuf = current->linbuf; 19932785Speter int alloc_lines = current->alloc_lines; 20032785Speter int line = 0; 20132785Speter int linbuf_base = current->linbuf_base; 20232785Speter int *cureqs = (int *) xmalloc (alloc_lines * sizeof (int)); 20332785Speter struct equivclass *eqs = equivs; 20432785Speter int eqs_index = equivs_index; 20532785Speter int eqs_alloc = equivs_alloc; 20632785Speter char const *suffix_begin = current->suffix_begin; 20732785Speter char const *bufend = current->buffer + current->buffered_chars; 20832785Speter int use_line_cmp = ignore_some_line_changes; 20932785Speter 21032785Speter while ((char const *) p < suffix_begin) 21132785Speter { 21232785Speter char const *ip = (char const *) p; 21332785Speter 21432785Speter /* Compute the equivalence class for this line. */ 21532785Speter 21632785Speter h = 0; 21732785Speter 21832785Speter /* Hash this line until we find a newline. */ 21932785Speter if (ignore_case_flag) 22032785Speter { 22132785Speter if (ignore_all_space_flag) 22232785Speter while ((c = *p++) != '\n') 22332785Speter { 22432785Speter if (! ISSPACE (c)) 22532785Speter h = HASH (h, ISUPPER (c) ? tolower (c) : c); 22632785Speter } 22732785Speter else if (ignore_space_change_flag) 22832785Speter while ((c = *p++) != '\n') 22932785Speter { 23032785Speter if (ISSPACE (c)) 23132785Speter { 23232785Speter for (;;) 23332785Speter { 23432785Speter c = *p++; 23532785Speter if (!ISSPACE (c)) 23632785Speter break; 23732785Speter if (c == '\n') 23832785Speter goto hashing_done; 23932785Speter } 24032785Speter h = HASH (h, ' '); 24132785Speter } 24232785Speter /* C is now the first non-space. */ 24332785Speter h = HASH (h, ISUPPER (c) ? tolower (c) : c); 24432785Speter } 24532785Speter else 24632785Speter while ((c = *p++) != '\n') 24732785Speter h = HASH (h, ISUPPER (c) ? tolower (c) : c); 24832785Speter } 24932785Speter else 25032785Speter { 25132785Speter if (ignore_all_space_flag) 25232785Speter while ((c = *p++) != '\n') 25332785Speter { 25432785Speter if (! ISSPACE (c)) 25532785Speter h = HASH (h, c); 25632785Speter } 25732785Speter else if (ignore_space_change_flag) 25832785Speter while ((c = *p++) != '\n') 25932785Speter { 26032785Speter if (ISSPACE (c)) 26132785Speter { 26232785Speter for (;;) 26332785Speter { 26432785Speter c = *p++; 26532785Speter if (!ISSPACE (c)) 26632785Speter break; 26732785Speter if (c == '\n') 26832785Speter goto hashing_done; 26932785Speter } 27032785Speter h = HASH (h, ' '); 27132785Speter } 27232785Speter /* C is now the first non-space. */ 27332785Speter h = HASH (h, c); 27432785Speter } 27532785Speter else 27632785Speter while ((c = *p++) != '\n') 27732785Speter h = HASH (h, c); 27832785Speter } 27932785Speter hashing_done:; 28032785Speter 28132785Speter bucket = &buckets[h % nbuckets]; 28232785Speter length = (char const *) p - ip - 1; 28332785Speter 28432785Speter if ((char const *) p == bufend 28532785Speter && current->missing_newline 28632785Speter && ROBUST_OUTPUT_STYLE (output_style)) 28732785Speter { 28832785Speter /* This line is incomplete. If this is significant, 28932785Speter put the line into bucket[-1]. */ 29032785Speter if (! (ignore_space_change_flag | ignore_all_space_flag)) 29132785Speter bucket = &buckets[-1]; 29232785Speter 29332785Speter /* Omit the inserted newline when computing linbuf later. */ 29432785Speter p--; 29532785Speter bufend = suffix_begin = (char const *) p; 29632785Speter } 29732785Speter 29832785Speter for (i = *bucket; ; i = eqs[i].next) 29932785Speter if (!i) 30032785Speter { 30132785Speter /* Create a new equivalence class in this bucket. */ 30232785Speter i = eqs_index++; 30332785Speter if (i == eqs_alloc) 30432785Speter eqs = (struct equivclass *) 30532785Speter xrealloc (eqs, (eqs_alloc*=2) * sizeof(*eqs)); 30632785Speter eqs[i].next = *bucket; 30732785Speter eqs[i].hash = h; 30832785Speter eqs[i].line = ip; 30932785Speter eqs[i].length = length; 31032785Speter *bucket = i; 31132785Speter break; 31232785Speter } 31332785Speter else if (eqs[i].hash == h) 31432785Speter { 31532785Speter char const *eqline = eqs[i].line; 31632785Speter 31732785Speter /* Reuse existing equivalence class if the lines are identical. 31832785Speter This detects the common case of exact identity 31932785Speter faster than complete comparison would. */ 32032785Speter if (eqs[i].length == length && memcmp (eqline, ip, length) == 0) 32132785Speter break; 32232785Speter 32332785Speter /* Reuse existing class if line_cmp reports the lines equal. */ 32432785Speter if (use_line_cmp && line_cmp (eqline, ip) == 0) 32532785Speter break; 32632785Speter } 32732785Speter 32832785Speter /* Maybe increase the size of the line table. */ 32932785Speter if (line == alloc_lines) 33032785Speter { 33132785Speter /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */ 33232785Speter alloc_lines = 2 * alloc_lines - linbuf_base; 33332785Speter cureqs = (int *) xrealloc (cureqs, alloc_lines * sizeof (*cureqs)); 33432785Speter linbuf = (char const **) xrealloc (linbuf + linbuf_base, 33532785Speter (alloc_lines - linbuf_base) 33632785Speter * sizeof (*linbuf)) 33732785Speter - linbuf_base; 33832785Speter } 33932785Speter linbuf[line] = ip; 34032785Speter cureqs[line] = i; 34132785Speter ++line; 34232785Speter } 34332785Speter 34432785Speter current->buffered_lines = line; 34532785Speter 34632785Speter for (i = 0; ; i++) 34732785Speter { 34832785Speter /* Record the line start for lines in the suffix that we care about. 34932785Speter Record one more line start than lines, 35032785Speter so that we can compute the length of any buffered line. */ 35132785Speter if (line == alloc_lines) 35232785Speter { 35332785Speter /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */ 35432785Speter alloc_lines = 2 * alloc_lines - linbuf_base; 35532785Speter linbuf = (char const **) xrealloc (linbuf + linbuf_base, 35632785Speter (alloc_lines - linbuf_base) 35732785Speter * sizeof (*linbuf)) 35832785Speter - linbuf_base; 35932785Speter } 36032785Speter linbuf[line] = (char const *) p; 36132785Speter 36232785Speter if ((char const *) p == bufend) 36332785Speter break; 36432785Speter 36532785Speter if (context <= i && no_diff_means_no_output) 36632785Speter break; 36732785Speter 36832785Speter line++; 36932785Speter 37032785Speter while (*p++ != '\n') 37132785Speter ; 37232785Speter } 37332785Speter 37432785Speter /* Done with cache in local variables. */ 37532785Speter current->linbuf = linbuf; 37632785Speter current->valid_lines = line; 37732785Speter current->alloc_lines = alloc_lines; 37832785Speter current->equivs = cureqs; 37932785Speter equivs = eqs; 38032785Speter equivs_alloc = eqs_alloc; 38132785Speter equivs_index = eqs_index; 38232785Speter} 38332785Speter 38432785Speter/* Prepare the end of the text. Make sure it's initialized. 38532785Speter Make sure text ends in a newline, 38632785Speter but remember that we had to add one. */ 38732785Speter 38832785Speterstatic void 38932785Speterprepare_text_end (current) 39032785Speter struct file_data *current; 39132785Speter{ 39232785Speter size_t buffered_chars = current->buffered_chars; 39332785Speter char *p = current->buffer; 39432785Speter 39532785Speter if (buffered_chars == 0 || p[buffered_chars - 1] == '\n') 39632785Speter current->missing_newline = 0; 39732785Speter else 39832785Speter { 39932785Speter p[buffered_chars++] = '\n'; 40032785Speter current->buffered_chars = buffered_chars; 40132785Speter current->missing_newline = 1; 40232785Speter } 40332785Speter 40432785Speter /* Don't use uninitialized storage when planting or using sentinels. */ 40532785Speter if (p) 40632785Speter bzero (p + buffered_chars, sizeof (word)); 40732785Speter} 40832785Speter 40932785Speter/* Given a vector of two file_data objects, find the identical 41032785Speter prefixes and suffixes of each object. */ 41132785Speter 41232785Speterstatic void 41332785Speterfind_identical_ends (filevec) 41432785Speter struct file_data filevec[]; 41532785Speter{ 41632785Speter word *w0, *w1; 41732785Speter char *p0, *p1, *buffer0, *buffer1; 41832785Speter char const *end0, *beg0; 41932785Speter char const **linbuf0, **linbuf1; 42032785Speter int i, lines; 42132785Speter size_t n0, n1, tem; 42232785Speter int alloc_lines0, alloc_lines1; 42332785Speter int buffered_prefix, prefix_count, prefix_mask; 42432785Speter 42532785Speter slurp (&filevec[0]); 42632785Speter if (filevec[0].desc != filevec[1].desc) 42732785Speter slurp (&filevec[1]); 42832785Speter else 42932785Speter { 43032785Speter filevec[1].buffer = filevec[0].buffer; 43132785Speter filevec[1].bufsize = filevec[0].bufsize; 43232785Speter filevec[1].buffered_chars = filevec[0].buffered_chars; 43332785Speter } 43432785Speter for (i = 0; i < 2; i++) 43532785Speter prepare_text_end (&filevec[i]); 43632785Speter 43732785Speter /* Find identical prefix. */ 43832785Speter 43932785Speter p0 = buffer0 = filevec[0].buffer; 44032785Speter p1 = buffer1 = filevec[1].buffer; 44132785Speter 44232785Speter n0 = filevec[0].buffered_chars; 44332785Speter n1 = filevec[1].buffered_chars; 44432785Speter 44532785Speter if (p0 == p1) 44632785Speter /* The buffers are the same; sentinels won't work. */ 44732785Speter p0 = p1 += n1; 44832785Speter else 44932785Speter { 45032785Speter /* Insert end sentinels, in this case characters that are guaranteed 45132785Speter to make the equality test false, and thus terminate the loop. */ 45232785Speter 45332785Speter if (n0 < n1) 45432785Speter p0[n0] = ~p1[n0]; 45532785Speter else 45632785Speter p1[n1] = ~p0[n1]; 45732785Speter 45832785Speter /* Loop until first mismatch, or to the sentinel characters. */ 45932785Speter 46032785Speter /* Compare a word at a time for speed. */ 46132785Speter w0 = (word *) p0; 46232785Speter w1 = (word *) p1; 46332785Speter while (*w0++ == *w1++) 46432785Speter ; 46532785Speter --w0, --w1; 46632785Speter 46732785Speter /* Do the last few bytes of comparison a byte at a time. */ 46832785Speter p0 = (char *) w0; 46932785Speter p1 = (char *) w1; 47032785Speter while (*p0++ == *p1++) 47132785Speter ; 47232785Speter --p0, --p1; 47332785Speter 47432785Speter /* Don't mistakenly count missing newline as part of prefix. */ 47532785Speter if (ROBUST_OUTPUT_STYLE (output_style) 47632785Speter && (buffer0 + n0 - filevec[0].missing_newline < p0) 47732785Speter != 47832785Speter (buffer1 + n1 - filevec[1].missing_newline < p1)) 47932785Speter --p0, --p1; 48032785Speter } 48132785Speter 48232785Speter /* Now P0 and P1 point at the first nonmatching characters. */ 48332785Speter 48432785Speter /* Skip back to last line-beginning in the prefix, 48532785Speter and then discard up to HORIZON_LINES lines from the prefix. */ 48632785Speter i = horizon_lines; 48732785Speter while (p0 != buffer0 && (p0[-1] != '\n' || i--)) 48832785Speter --p0, --p1; 48932785Speter 49032785Speter /* Record the prefix. */ 49132785Speter filevec[0].prefix_end = p0; 49232785Speter filevec[1].prefix_end = p1; 49332785Speter 49432785Speter /* Find identical suffix. */ 49532785Speter 49632785Speter /* P0 and P1 point beyond the last chars not yet compared. */ 49732785Speter p0 = buffer0 + n0; 49832785Speter p1 = buffer1 + n1; 49932785Speter 50032785Speter if (! ROBUST_OUTPUT_STYLE (output_style) 50132785Speter || filevec[0].missing_newline == filevec[1].missing_newline) 50232785Speter { 50332785Speter end0 = p0; /* Addr of last char in file 0. */ 50432785Speter 50532785Speter /* Get value of P0 at which we should stop scanning backward: 50632785Speter this is when either P0 or P1 points just past the last char 50732785Speter of the identical prefix. */ 50832785Speter beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1); 50932785Speter 51032785Speter /* Scan back until chars don't match or we reach that point. */ 511128266Speter for (; p0 != beg0; p0--, p1--) 512128266Speter if (*p0 != *p1) 51332785Speter { 51432785Speter /* Point at the first char of the matching suffix. */ 51532785Speter beg0 = p0; 51632785Speter break; 51732785Speter } 51832785Speter 51932785Speter /* Are we at a line-beginning in both files? If not, add the rest of 52032785Speter this line to the main body. Discard up to HORIZON_LINES lines from 52132785Speter the identical suffix. Also, discard one extra line, 52232785Speter because shift_boundaries may need it. */ 52332785Speter i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n') 52432785Speter && 52532785Speter (buffer1 == p1 || p1[-1] == '\n')); 52632785Speter while (i-- && p0 != end0) 52732785Speter while (*p0++ != '\n') 52832785Speter ; 52932785Speter 53032785Speter p1 += p0 - beg0; 53132785Speter } 53232785Speter 53332785Speter /* Record the suffix. */ 53432785Speter filevec[0].suffix_begin = p0; 53532785Speter filevec[1].suffix_begin = p1; 53632785Speter 53732785Speter /* Calculate number of lines of prefix to save. 53832785Speter 53932785Speter prefix_count == 0 means save the whole prefix; 54032785Speter we need this with for options like -D that output the whole file. 54132785Speter We also need it for options like -F that output some preceding line; 54232785Speter at least we will need to find the last few lines, 54332785Speter but since we don't know how many, it's easiest to find them all. 54432785Speter 54532785Speter Otherwise, prefix_count != 0. Save just prefix_count lines at start 54632785Speter of the line buffer; they'll be moved to the proper location later. 54732785Speter Handle 1 more line than the context says (because we count 1 too many), 54832785Speter rounded up to the next power of 2 to speed index computation. */ 54932785Speter 55032785Speter if (no_diff_means_no_output && ! function_regexp_list) 55132785Speter { 55232785Speter for (prefix_count = 1; prefix_count < context + 1; prefix_count *= 2) 55332785Speter ; 55432785Speter prefix_mask = prefix_count - 1; 55532785Speter alloc_lines0 55632785Speter = prefix_count 55732785Speter + GUESS_LINES (0, 0, p0 - filevec[0].prefix_end) 55832785Speter + context; 55932785Speter } 56032785Speter else 56132785Speter { 56232785Speter prefix_count = 0; 56332785Speter prefix_mask = ~0; 56432785Speter alloc_lines0 = GUESS_LINES (0, 0, n0); 56532785Speter } 56632785Speter 56732785Speter lines = 0; 56832785Speter linbuf0 = (char const **) xmalloc (alloc_lines0 * sizeof (*linbuf0)); 56932785Speter 57032785Speter /* If the prefix is needed, find the prefix lines. */ 57132785Speter if (! (no_diff_means_no_output 57232785Speter && filevec[0].prefix_end == p0 57332785Speter && filevec[1].prefix_end == p1)) 57432785Speter { 57532785Speter p0 = buffer0; 57632785Speter end0 = filevec[0].prefix_end; 57732785Speter while (p0 != end0) 57832785Speter { 57932785Speter int l = lines++ & prefix_mask; 58032785Speter if (l == alloc_lines0) 58132785Speter linbuf0 = (char const **) xrealloc (linbuf0, (alloc_lines0 *= 2) 58232785Speter * sizeof(*linbuf0)); 58332785Speter linbuf0[l] = p0; 58432785Speter while (*p0++ != '\n') 58532785Speter ; 58632785Speter } 58732785Speter } 58832785Speter buffered_prefix = prefix_count && context < lines ? context : lines; 58932785Speter 59032785Speter /* Allocate line buffer 1. */ 59132785Speter tem = prefix_count ? filevec[1].suffix_begin - buffer1 : n1; 59232785Speter 59332785Speter alloc_lines1 59432785Speter = (buffered_prefix 59532785Speter + GUESS_LINES (lines, filevec[1].prefix_end - buffer1, tem) 59632785Speter + context); 59732785Speter linbuf1 = (char const **) xmalloc (alloc_lines1 * sizeof (*linbuf1)); 59832785Speter 59932785Speter if (buffered_prefix != lines) 60032785Speter { 60132785Speter /* Rotate prefix lines to proper location. */ 60232785Speter for (i = 0; i < buffered_prefix; i++) 60332785Speter linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask]; 60432785Speter for (i = 0; i < buffered_prefix; i++) 60532785Speter linbuf0[i] = linbuf1[i]; 60632785Speter } 60732785Speter 60832785Speter /* Initialize line buffer 1 from line buffer 0. */ 60932785Speter for (i = 0; i < buffered_prefix; i++) 61032785Speter linbuf1[i] = linbuf0[i] - buffer0 + buffer1; 61132785Speter 61232785Speter /* Record the line buffer, adjusted so that 61332785Speter linbuf*[0] points at the first differing line. */ 61432785Speter filevec[0].linbuf = linbuf0 + buffered_prefix; 61532785Speter filevec[1].linbuf = linbuf1 + buffered_prefix; 61632785Speter filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix; 61732785Speter filevec[0].alloc_lines = alloc_lines0 - buffered_prefix; 61832785Speter filevec[1].alloc_lines = alloc_lines1 - buffered_prefix; 61932785Speter filevec[0].prefix_lines = filevec[1].prefix_lines = lines; 62032785Speter} 62132785Speter 62232785Speter/* Largest primes less than some power of two, for nbuckets. Values range 62332785Speter from useful to preposterous. If one of these numbers isn't prime 62432785Speter after all, don't blame it on me, blame it on primes (6) . . . */ 62532785Speterstatic int const primes[] = 62632785Speter{ 62732785Speter 509, 62832785Speter 1021, 62932785Speter 2039, 63032785Speter 4093, 63132785Speter 8191, 63232785Speter 16381, 63332785Speter 32749, 63432785Speter#if 32767 < INT_MAX 63532785Speter 65521, 63632785Speter 131071, 63732785Speter 262139, 63832785Speter 524287, 63932785Speter 1048573, 64032785Speter 2097143, 64132785Speter 4194301, 64232785Speter 8388593, 64332785Speter 16777213, 64432785Speter 33554393, 64532785Speter 67108859, /* Preposterously large . . . */ 64632785Speter 134217689, 64732785Speter 268435399, 64832785Speter 536870909, 64932785Speter 1073741789, 65032785Speter 2147483647, 65132785Speter#endif 65232785Speter 0 65332785Speter}; 65432785Speter 65532785Speter/* Given a vector of two file_data objects, read the file associated 65632785Speter with each one, and build the table of equivalence classes. 65732785Speter Return 1 if either file appears to be a binary file. 65832785Speter If PRETEND_BINARY is nonzero, pretend they are binary regardless. */ 65932785Speter 66032785Speterint 66132785Speterread_files (filevec, pretend_binary) 66232785Speter struct file_data filevec[]; 66332785Speter int pretend_binary; 66432785Speter{ 66532785Speter int i; 66632785Speter int skip_test = always_text_flag | pretend_binary; 66732785Speter int appears_binary = pretend_binary | sip (&filevec[0], skip_test); 66832785Speter 66932785Speter if (filevec[0].desc != filevec[1].desc) 67032785Speter appears_binary |= sip (&filevec[1], skip_test | appears_binary); 67132785Speter else 67232785Speter { 67332785Speter filevec[1].buffer = filevec[0].buffer; 67432785Speter filevec[1].bufsize = filevec[0].bufsize; 67532785Speter filevec[1].buffered_chars = filevec[0].buffered_chars; 67632785Speter } 67732785Speter if (appears_binary) 67832785Speter { 67932785Speter#if HAVE_SETMODE 68032785Speter setmode (filevec[0].desc, O_BINARY); 68132785Speter setmode (filevec[1].desc, O_BINARY); 68232785Speter#endif 68332785Speter return 1; 68432785Speter } 68532785Speter 68632785Speter find_identical_ends (filevec); 68732785Speter 68832785Speter equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1; 68932785Speter equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass)); 69032785Speter /* Equivalence class 0 is permanently safe for lines that were not 69132785Speter hashed. Real equivalence classes start at 1. */ 69232785Speter equivs_index = 1; 69332785Speter 69432785Speter for (i = 0; primes[i] < equivs_alloc / 3; i++) 69532785Speter if (! primes[i]) 69632785Speter abort (); 69732785Speter nbuckets = primes[i]; 69832785Speter 69932785Speter buckets = (int *) xmalloc ((nbuckets + 1) * sizeof (*buckets)); 70032785Speter bzero (buckets++, (nbuckets + 1) * sizeof (*buckets)); 70132785Speter 70232785Speter for (i = 0; i < 2; i++) 70332785Speter find_and_hash_each_line (&filevec[i]); 70432785Speter 70532785Speter filevec[0].equiv_max = filevec[1].equiv_max = equivs_index; 70632785Speter 70732785Speter free (equivs); 70832785Speter free (buckets - 1); 70932785Speter 71032785Speter return 0; 71132785Speter} 712