1/* cmp - compare two files byte by byte
2
3   Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4   2002 Free Software Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14   See the GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; see the file COPYING.
18   If not, write to the Free Software Foundation,
19   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
20
21#include "system.h"
22
23#include <stdio.h>
24#include <cmpbuf.h>
25#include <c-stack.h>
26#include <error.h>
27#include <exitfail.h>
28#include <freesoft.h>
29#include <getopt.h>
30#include <hard-locale.h>
31#include <inttostr.h>
32#include <setmode.h>
33#include <xalloc.h>
34#include <xstrtol.h>
35
36#if defined LC_MESSAGES && ENABLE_NLS
37# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
38#else
39# define hard_locale_LC_MESSAGES 0
40#endif
41
42static char const authorship_msgid[] =
43  N_("Written by Torbjorn Granlund and David MacKenzie.");
44
45static char const copyright_string[] =
46  "Copyright (C) 2002 Free Software Foundation, Inc.";
47
48extern char const version_string[];
49
50static int cmp (void);
51static off_t file_position (int);
52static size_t block_compare (word const *, word const *);
53static size_t block_compare_and_count (word const *, word const *, off_t *);
54static void sprintc (char *, unsigned char);
55
56/* Name under which this program was invoked.  */
57char *program_name;
58
59/* Filenames of the compared files.  */
60static char const *file[2];
61
62/* File descriptors of the files.  */
63static int file_desc[2];
64
65/* Status of the files.  */
66static struct stat stat_buf[2];
67
68/* Read buffers for the files.  */
69static word *buffer[2];
70
71/* Optimal block size for the files.  */
72static size_t buf_size;
73
74/* Initial prefix to ignore for each file.  */
75static off_t ignore_initial[2];
76
77/* Number of bytes to compare.  */
78static uintmax_t bytes = UINTMAX_MAX;
79
80/* Output format.  */
81static enum comparison_type
82  {
83    type_first_diff,	/* Print the first difference.  */
84    type_all_diffs,	/* Print all differences.  */
85    type_status		/* Exit status only.  */
86  } comparison_type;
87
88/* If nonzero, print values of bytes quoted like cat -t does. */
89static bool opt_print_bytes;
90
91/* Values for long options that do not have single-letter equivalents.  */
92enum
93{
94  HELP_OPTION = CHAR_MAX + 1
95};
96
97static struct option const long_options[] =
98{
99  {"print-bytes", 0, 0, 'b'},
100  {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
101  {"ignore-initial", 1, 0, 'i'},
102  {"verbose", 0, 0, 'l'},
103  {"bytes", 1, 0, 'n'},
104  {"silent", 0, 0, 's'},
105  {"quiet", 0, 0, 's'},
106  {"version", 0, 0, 'v'},
107  {"help", 0, 0, HELP_OPTION},
108  {0, 0, 0, 0}
109};
110
111static void try_help (char const *, char const *) __attribute__((noreturn));
112static void
113try_help (char const *reason_msgid, char const *operand)
114{
115  if (reason_msgid)
116    error (0, 0, _(reason_msgid), operand);
117  error (EXIT_TROUBLE, 0,
118	 _("Try `%s --help' for more information."), program_name);
119  abort ();
120}
121
122static char const valid_suffixes[] = "kKMGTPEZY0";
123
124/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
125   point after the operand.  If DELIMITER is nonzero, the operand may
126   be followed by DELIMITER; otherwise it must be null-terminated.  */
127static off_t
128parse_ignore_initial (char **argptr, char delimiter)
129{
130  uintmax_t val;
131  off_t o;
132  char const *arg = *argptr;
133  strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
134  if (! (e == LONGINT_OK
135	 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
136      || (o = val) < 0 || o != val || val == UINTMAX_MAX)
137    try_help ("invalid --ignore-initial value `%s'", arg);
138  return o;
139}
140
141/* Specify the output format.  */
142static void
143specify_comparison_type (enum comparison_type t)
144{
145  if (comparison_type)
146    try_help ("options -l and -s are incompatible", 0);
147  comparison_type = t;
148}
149
150static void
151check_stdout (void)
152{
153  if (ferror (stdout))
154    error (EXIT_TROUBLE, 0, "%s", _("write failed"));
155  else if (fclose (stdout) != 0)
156    error (EXIT_TROUBLE, errno, "%s", _("standard output"));
157}
158
159static char const * const option_help_msgid[] = {
160  N_("-b  --print-bytes  Print differing bytes."),
161  N_("-i SKIP  --ignore-initial=SKIP  Skip the first SKIP bytes of input."),
162  N_("-i SKIP1:SKIP2  --ignore-initial=SKIP1:SKIP2"),
163  N_("  Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
164  N_("-l  --verbose  Output byte numbers and values of all differing bytes."),
165  N_("-n LIMIT  --bytes=LIMIT  Compare at most LIMIT bytes."),
166  N_("-s  --quiet  --silent  Output nothing; yield exit status only."),
167  N_("-v  --version  Output version info."),
168  N_("--help  Output this help."),
169  0
170};
171
172static void
173usage (void)
174{
175  char const * const *p;
176
177  printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
178	  program_name);
179  printf ("%s\n\n", _("Compare two files byte by byte."));
180  for (p = option_help_msgid;  *p;  p++)
181    printf ("  %s\n", _(*p));
182  printf ("\n%s\n%s\n\n%s\n\n%s\n",
183	  _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
184	  _("SKIP values may be followed by the following multiplicative suffixes:\n\
185kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
186GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
187	  _("If a FILE is `-' or missing, read standard input."),
188	  _("Report bugs to <bug-gnu-utils@gnu.org>."));
189}
190
191int
192main (int argc, char **argv)
193{
194  int c, f, exit_status;
195  size_t words_per_buffer;
196
197  exit_failure = EXIT_TROUBLE;
198  initialize_main (&argc, &argv);
199  program_name = argv[0];
200  setlocale (LC_ALL, "");
201  bindtextdomain (PACKAGE, LOCALEDIR);
202  textdomain (PACKAGE);
203  c_stack_action (c_stack_die);
204
205  /* Parse command line options.  */
206
207  while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
208	 != -1)
209    switch (c)
210      {
211      case 'b':
212      case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
213	opt_print_bytes = 1;
214	break;
215
216      case 'i':
217	ignore_initial[0] = parse_ignore_initial (&optarg, ':');
218	ignore_initial[1] = (*optarg++ == ':'
219			     ? parse_ignore_initial (&optarg, 0)
220			     : ignore_initial[0]);
221	break;
222
223      case 'l':
224	specify_comparison_type (type_all_diffs);
225	break;
226
227      case 'n':
228	{
229	  uintmax_t n;
230	  if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
231	    try_help ("invalid --bytes value `%s'", optarg);
232	  if (n < bytes)
233	    bytes = n;
234	}
235	break;
236
237      case 's':
238	specify_comparison_type (type_status);
239	break;
240
241      case 'v':
242	printf ("cmp %s\n%s\n\n%s\n\n%s\n",
243		version_string, copyright_string,
244		_(free_software_msgid), _(authorship_msgid));
245	check_stdout ();
246	return EXIT_SUCCESS;
247
248      case HELP_OPTION:
249	usage ();
250	check_stdout ();
251	return EXIT_SUCCESS;
252
253      default:
254	try_help (0, 0);
255      }
256
257  if (optind == argc)
258    try_help ("missing operand after `%s'", argv[argc - 1]);
259
260  file[0] = argv[optind++];
261  file[1] = optind < argc ? argv[optind++] : "-";
262
263  for (f = 0; f < 2 && optind < argc; f++)
264    {
265      char *arg = argv[optind++];
266      ignore_initial[f] = parse_ignore_initial (&arg, 0);
267    }
268
269  if (optind < argc)
270    try_help ("extra operand `%s'", argv[optind]);
271
272  for (f = 0; f < 2; f++)
273    {
274      /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
275	 stdin is closed and opening file[0] yields file descriptor 0.  */
276      int f1 = f ^ (strcmp (file[1], "-") == 0);
277
278      /* Two files with the same name are identical.
279	 But wait until we open the file once, for proper diagnostics.  */
280      if (f && file_name_cmp (file[0], file[1]) == 0)
281	return EXIT_SUCCESS;
282
283      file_desc[f1] = (strcmp (file[f1], "-") == 0
284		       ? STDIN_FILENO
285		       : open (file[f1], O_RDONLY, 0));
286      if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
287	{
288	  if (file_desc[f1] < 0 && comparison_type == type_status)
289	    exit (EXIT_TROUBLE);
290	  else
291	    error (EXIT_TROUBLE, errno, "%s", file[f1]);
292	}
293
294      set_binary_mode (file_desc[f1], 1);
295    }
296
297  /* If the files are links to the same inode and have the same file position,
298     they are identical.  */
299
300  if (0 < same_file (&stat_buf[0], &stat_buf[1])
301      && same_file_attributes (&stat_buf[0], &stat_buf[1])
302      && file_position (0) == file_position (1))
303    return EXIT_SUCCESS;
304
305  /* If output is redirected to the null device, we may assume `-s'.  */
306
307  if (comparison_type != type_status)
308    {
309      struct stat outstat, nullstat;
310
311      if (fstat (STDOUT_FILENO, &outstat) == 0
312	  && stat (NULL_DEVICE, &nullstat) == 0
313	  && 0 < same_file (&outstat, &nullstat))
314	comparison_type = type_status;
315    }
316
317  /* If only a return code is needed,
318     and if both input descriptors are associated with plain files,
319     conclude that the files differ if they have different sizes
320     and if more bytes will be compared than are in the smaller file.  */
321
322  if (comparison_type == type_status
323      && S_ISREG (stat_buf[0].st_mode)
324      && S_ISREG (stat_buf[1].st_mode))
325    {
326      off_t s0 = stat_buf[0].st_size - file_position (0);
327      off_t s1 = stat_buf[1].st_size - file_position (1);
328      if (s0 < 0)
329	s0 = 0;
330      if (s1 < 0)
331	s1 = 0;
332      if (s0 != s1 && MIN (s0, s1) < bytes)
333	exit (EXIT_FAILURE);
334    }
335
336  /* Get the optimal block size of the files.  */
337
338  buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
339			 STAT_BLOCKSIZE (stat_buf[1]),
340			 PTRDIFF_MAX - sizeof (word));
341
342  /* Allocate word-aligned buffers, with space for sentinels at the end.  */
343
344  words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
345  buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
346  buffer[1] = buffer[0] + words_per_buffer;
347
348  exit_status = cmp ();
349
350  for (f = 0; f < 2; f++)
351    if (close (file_desc[f]) != 0)
352      error (EXIT_TROUBLE, errno, "%s", file[f]);
353  if (exit_status != 0  &&  comparison_type != type_status)
354    check_stdout ();
355  exit (exit_status);
356  return exit_status;
357}
358
359/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
360   using `buffer[0]' and `buffer[1]'.
361   Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
362   >1 if error.  */
363
364static int
365cmp (void)
366{
367  off_t line_number = 1;	/* Line number (1...) of difference. */
368  off_t byte_number = 1;	/* Byte number (1...) of difference. */
369  uintmax_t remaining = bytes;	/* Remaining number of bytes to compare.  */
370  size_t read0, read1;		/* Number of bytes read from each file. */
371  size_t first_diff;		/* Offset (0...) in buffers of 1st diff. */
372  size_t smaller;		/* The lesser of `read0' and `read1'. */
373  word *buffer0 = buffer[0];
374  word *buffer1 = buffer[1];
375  char *buf0 = (char *) buffer0;
376  char *buf1 = (char *) buffer1;
377  int ret = EXIT_SUCCESS;
378  int f;
379  int offset_width;
380
381  if (comparison_type == type_all_diffs)
382    {
383      off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
384
385      for (f = 0; f < 2; f++)
386	if (S_ISREG (stat_buf[f].st_mode))
387	  {
388	    off_t file_bytes = stat_buf[f].st_size - file_position (f);
389	    if (file_bytes < byte_number_max)
390	      byte_number_max = file_bytes;
391	  }
392
393      for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
394	continue;
395    }
396
397  for (f = 0; f < 2; f++)
398    {
399      off_t ig = ignore_initial[f];
400      if (ig && file_position (f) == -1)
401	{
402	  /* lseek failed; read and discard the ignored initial prefix.  */
403	  do
404	    {
405	      size_t bytes_to_read = MIN (ig, buf_size);
406	      size_t r = block_read (file_desc[f], buf0, bytes_to_read);
407	      if (r != bytes_to_read)
408		{
409		  if (r == SIZE_MAX)
410		    error (EXIT_TROUBLE, errno, "%s", file[f]);
411		  break;
412		}
413	      ig -= r;
414	    }
415	  while (ig);
416	}
417    }
418
419  do
420    {
421      size_t bytes_to_read = buf_size;
422
423      if (remaining != UINTMAX_MAX)
424	{
425	  if (remaining < bytes_to_read)
426	    bytes_to_read = remaining;
427	  remaining -= bytes_to_read;
428	}
429
430      read0 = block_read (file_desc[0], buf0, bytes_to_read);
431      if (read0 == SIZE_MAX)
432	error (EXIT_TROUBLE, errno, "%s", file[0]);
433      read1 = block_read (file_desc[1], buf1, bytes_to_read);
434      if (read1 == SIZE_MAX)
435	error (EXIT_TROUBLE, errno, "%s", file[1]);
436
437      /* Insert sentinels for the block compare.  */
438
439      buf0[read0] = ~buf1[read0];
440      buf1[read1] = ~buf0[read1];
441
442      /* If the line number should be written for differing files,
443	 compare the blocks and count the number of newlines
444	 simultaneously.  */
445      first_diff = (comparison_type == type_first_diff
446		    ? block_compare_and_count (buffer0, buffer1, &line_number)
447		    : block_compare (buffer0, buffer1));
448
449      byte_number += first_diff;
450      smaller = MIN (read0, read1);
451
452      if (first_diff < smaller)
453	{
454	  switch (comparison_type)
455	    {
456	    case type_first_diff:
457	      {
458		char byte_buf[INT_BUFSIZE_BOUND (off_t)];
459		char line_buf[INT_BUFSIZE_BOUND (off_t)];
460		char const *byte_num = offtostr (byte_number, byte_buf);
461		char const *line_num = offtostr (line_number, line_buf);
462		if (!opt_print_bytes)
463		  {
464		    /* See POSIX 1003.1-2001 for this format.  This
465		       message is used only in the POSIX locale, so it
466		       need not be translated.  */
467		    static char const char_message[] =
468		      "%s %s differ: char %s, line %s\n";
469
470		    /* The POSIX rationale recommends using the word
471		       "byte" outside the POSIX locale.  Some gettext
472		       implementations translate even in the POSIX
473		       locale if certain other environment variables
474		       are set, so use "byte" if a translation is
475		       available, or if outside the POSIX locale.  */
476		    static char const byte_msgid[] =
477		      N_("%s %s differ: byte %s, line %s\n");
478		    char const *byte_message = _(byte_msgid);
479		    bool use_byte_message = (byte_message != byte_msgid
480					     || hard_locale_LC_MESSAGES);
481
482		    printf ((use_byte_message
483			     ? byte_message
484			     : "%s %s differ: char %s, line %s\n"),
485			    file[0], file[1], byte_num, line_num);
486		  }
487		else
488		  {
489		    unsigned char c0 = buf0[first_diff];
490		    unsigned char c1 = buf1[first_diff];
491		    char s0[5];
492		    char s1[5];
493		    sprintc (s0, c0);
494		    sprintc (s1, c1);
495		    printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
496			    file[0], file[1], byte_num, line_num,
497			    c0, s0, c1, s1);
498		}
499	      }
500	      /* Fall through.  */
501	    case type_status:
502	      return EXIT_FAILURE;
503
504	    case type_all_diffs:
505	      do
506		{
507		  unsigned char c0 = buf0[first_diff];
508		  unsigned char c1 = buf1[first_diff];
509		  if (c0 != c1)
510		    {
511		      char byte_buf[INT_BUFSIZE_BOUND (off_t)];
512		      char const *byte_num = offtostr (byte_number, byte_buf);
513		      if (!opt_print_bytes)
514			{
515			  /* See POSIX 1003.1-2001 for this format.  */
516			  printf ("%*s %3o %3o\n",
517				  offset_width, byte_num, c0, c1);
518			}
519		      else
520			{
521			  char s0[5];
522			  char s1[5];
523			  sprintc (s0, c0);
524			  sprintc (s1, c1);
525			  printf ("%*s %3o %-4s %3o %s\n",
526				  offset_width, byte_num, c0, s0, c1, s1);
527			}
528		    }
529		  byte_number++;
530		  first_diff++;
531		}
532	      while (first_diff < smaller);
533	      ret = EXIT_FAILURE;
534	      break;
535	    }
536	}
537
538      if (read0 != read1)
539	{
540	  if (comparison_type != type_status)
541	    {
542	      /* See POSIX 1003.1-2001 for this format.  */
543	      fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
544	    }
545
546	  return EXIT_FAILURE;
547	}
548    }
549  while (read0 == buf_size);
550
551  return ret;
552}
553
554/* Compare two blocks of memory P0 and P1 until they differ,
555   and count the number of '\n' occurrences in the common
556   part of P0 and P1.
557   If the blocks are not guaranteed to be different, put sentinels at the ends
558   of the blocks before calling this function.
559
560   Return the offset of the first byte that differs.
561   Increment *COUNT by the count of '\n' occurrences.  */
562
563static size_t
564block_compare_and_count (word const *p0, word const *p1, off_t *count)
565{
566  word l;		/* One word from first buffer. */
567  word const *l0, *l1;	/* Pointers into each buffer. */
568  char const *c0, *c1;	/* Pointers for finding exact address. */
569  size_t cnt = 0;	/* Number of '\n' occurrences. */
570  word nnnn;		/* Newline, sizeof (word) times.  */
571  int i;
572
573  nnnn = 0;
574  for (i = 0; i < sizeof nnnn; i++)
575    nnnn = (nnnn << CHAR_BIT) | '\n';
576
577  /* Find the rough position of the first difference by reading words,
578     not bytes.  */
579
580  for (l0 = p0, l1 = p1;  (l = *l0) == *l1;  l0++, l1++)
581    {
582      l ^= nnnn;
583      for (i = 0; i < sizeof l; i++)
584	{
585	  cnt += ! (unsigned char) l;
586	  l >>= CHAR_BIT;
587	}
588    }
589
590  /* Find the exact differing position (endianness independent).  */
591
592  for (c0 = (char const *) l0, c1 = (char const *) l1;
593       *c0 == *c1;
594       c0++, c1++)
595    cnt += *c0 == '\n';
596
597  *count += cnt;
598  return c0 - (char const *) p0;
599}
600
601/* Compare two blocks of memory P0 and P1 until they differ.
602   If the blocks are not guaranteed to be different, put sentinels at the ends
603   of the blocks before calling this function.
604
605   Return the offset of the first byte that differs.  */
606
607static size_t
608block_compare (word const *p0, word const *p1)
609{
610  word const *l0, *l1;
611  char const *c0, *c1;
612
613  /* Find the rough position of the first difference by reading words,
614     not bytes.  */
615
616  for (l0 = p0, l1 = p1;  *l0 == *l1;  l0++, l1++)
617    continue;
618
619  /* Find the exact differing position (endianness independent).  */
620
621  for (c0 = (char const *) l0, c1 = (char const *) l1;
622       *c0 == *c1;
623       c0++, c1++)
624    continue;
625
626  return c0 - (char const *) p0;
627}
628
629/* Put into BUF the unsigned char C, making unprintable bytes
630   visible by quoting like cat -t does.  */
631
632static void
633sprintc (char *buf, unsigned char c)
634{
635  if (! ISPRINT (c))
636    {
637      if (c >= 128)
638	{
639	  *buf++ = 'M';
640	  *buf++ = '-';
641	  c -= 128;
642	}
643      if (c < 32)
644	{
645	  *buf++ = '^';
646	  c += 64;
647	}
648      else if (c == 127)
649	{
650	  *buf++ = '^';
651	  c = '?';
652	}
653    }
654
655  *buf++ = c;
656  *buf = 0;
657}
658
659/* Position file F to ignore_initial[F] bytes from its initial position,
660   and yield its new position.  Don't try more than once.  */
661
662static off_t
663file_position (int f)
664{
665  static bool positioned[2];
666  static off_t position[2];
667
668  if (! positioned[f])
669    {
670      positioned[f] = 1;
671      position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
672    }
673  return position[f];
674}
675