1/* cmp - compare two files byte by byte
2
3   Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4   2002 Free Software Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14   See the GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; see the file COPYING.
18   If not, write to the Free Software Foundation,
19   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
20
21#include "system.h"
22
23#include <stdio.h>
24#include <cmpbuf.h>
25#include <c-stack.h>
26#include <error.h>
27#include <exitfail.h>
28#include <freesoft.h>
29#include <getopt.h>
30#include <hard-locale.h>
31#include <inttostr.h>
32#include <setmode.h>
33#include <xalloc.h>
34#include <xstrtol.h>
35
36#if defined LC_MESSAGES && ENABLE_NLS
37# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
38#else
39# define hard_locale_LC_MESSAGES 0
40#endif
41
42#ifdef __APPLE__
43#include "get_compat.h"
44#endif
45
46static char const authorship_msgid[] =
47  N_("Written by Torbjorn Granlund and David MacKenzie.");
48
49static char const copyright_string[] =
50  "Copyright (C) 2002 Free Software Foundation, Inc.";
51
52extern char const version_string[];
53
54static int cmp (void);
55static off_t file_position (int);
56static size_t block_compare (word const *, word const *);
57static size_t block_compare_and_count (word const *, word const *, off_t *);
58static void sprintc (char *, unsigned char);
59
60/* Name under which this program was invoked.  */
61char *program_name;
62
63/* Filenames of the compared files.  */
64static char const *file[2];
65
66/* File descriptors of the files.  */
67static int file_desc[2];
68
69/* Status of the files.  */
70static struct stat stat_buf[2];
71
72/* Read buffers for the files.  */
73static word *buffer[2];
74
75/* Optimal block size for the files.  */
76static size_t buf_size;
77
78/* Initial prefix to ignore for each file.  */
79static off_t ignore_initial[2];
80
81/* Number of bytes to compare.  */
82static uintmax_t bytes = UINTMAX_MAX;
83
84/* Output format.  */
85static enum comparison_type
86  {
87    type_first_diff,	/* Print the first difference.  */
88    type_all_diffs,	/* Print all differences.  */
89    type_status		/* Exit status only.  */
90  } comparison_type;
91
92/* If nonzero, print values of bytes quoted like cat -t does. */
93static bool opt_print_bytes;
94
95/* Values for long options that do not have single-letter equivalents.  */
96enum
97{
98  HELP_OPTION = CHAR_MAX + 1
99};
100
101static struct option const long_options[] =
102{
103  {"print-bytes", 0, 0, 'b'},
104  {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
105  {"ignore-initial", 1, 0, 'i'},
106  {"verbose", 0, 0, 'l'},
107  {"bytes", 1, 0, 'n'},
108  {"silent", 0, 0, 's'},
109  {"quiet", 0, 0, 's'},
110  {"version", 0, 0, 'v'},
111  {"help", 0, 0, HELP_OPTION},
112  {0, 0, 0, 0}
113};
114
115static void try_help (char const *, char const *) __attribute__((noreturn));
116static void
117try_help (char const *reason_msgid, char const *operand)
118{
119  if (reason_msgid)
120    error (0, 0, _(reason_msgid), operand);
121  error (EXIT_TROUBLE, 0,
122	 _("Try `%s --help' for more information."), program_name);
123  abort ();
124}
125
126static char const valid_suffixes[] = "kKMGTPEZY0";
127
128/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
129   point after the operand.  If DELIMITER is nonzero, the operand may
130   be followed by DELIMITER; otherwise it must be null-terminated.  */
131static off_t
132parse_ignore_initial (char **argptr, char delimiter)
133{
134  uintmax_t val;
135  off_t o;
136  char const *arg = *argptr;
137  strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
138  if (! (e == LONGINT_OK
139	 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
140      || (o = val) < 0 || o != val || val == UINTMAX_MAX)
141    try_help ("invalid --ignore-initial value `%s'", arg);
142  return o;
143}
144
145/* Specify the output format.  */
146static void
147specify_comparison_type (enum comparison_type t)
148{
149  if (comparison_type)
150    try_help ("options -l and -s are incompatible", 0);
151  comparison_type = t;
152}
153
154static void
155check_stdout (void)
156{
157  if (ferror (stdout))
158    error (EXIT_TROUBLE, 0, "%s", _("write failed"));
159  else if (fclose (stdout) != 0)
160    error (EXIT_TROUBLE, errno, "%s", _("standard output"));
161}
162
163static char const * const option_help_msgid[] = {
164  N_("-b  --print-bytes  Print differing bytes."),
165  N_("-i SKIP  --ignore-initial=SKIP  Skip the first SKIP bytes of input."),
166  N_("-i SKIP1:SKIP2  --ignore-initial=SKIP1:SKIP2"),
167  N_("  Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
168  N_("-l  --verbose  Output byte numbers and values of all differing bytes."),
169  N_("-n LIMIT  --bytes=LIMIT  Compare at most LIMIT bytes."),
170  N_("-s  --quiet  --silent  Output nothing; yield exit status only."),
171  N_("-v  --version  Output version info."),
172  N_("--help  Output this help."),
173  0
174};
175
176static void
177usage (void)
178{
179  char const * const *p;
180
181  printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
182	  program_name);
183  printf ("%s\n\n", _("Compare two files byte by byte."));
184  for (p = option_help_msgid;  *p;  p++)
185    printf ("  %s\n", _(*p));
186  printf ("\n%s\n%s\n\n%s\n\n%s\n",
187	  _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
188	  _("SKIP values may be followed by the following multiplicative suffixes:\n\
189kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
190GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
191	  _("If a FILE is `-' or missing, read standard input."),
192	  _("Report bugs to <bug-gnu-utils@gnu.org>."));
193}
194
195int
196main (int argc, char **argv)
197{
198  int c, f, exit_status;
199  size_t words_per_buffer;
200
201  exit_failure = EXIT_TROUBLE;
202  initialize_main (&argc, &argv);
203  program_name = argv[0];
204  setlocale (LC_ALL, "");
205  bindtextdomain (PACKAGE, LOCALEDIR);
206  textdomain (PACKAGE);
207  c_stack_action (c_stack_die);
208
209  /* Parse command line options.  */
210
211  while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
212	 != -1)
213    switch (c)
214      {
215      case 'b':
216      case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
217	opt_print_bytes = 1;
218	break;
219
220      case 'i':
221	ignore_initial[0] = parse_ignore_initial (&optarg, ':');
222	ignore_initial[1] = (*optarg++ == ':'
223			     ? parse_ignore_initial (&optarg, 0)
224			     : ignore_initial[0]);
225	break;
226
227      case 'l':
228	specify_comparison_type (type_all_diffs);
229	break;
230
231      case 'n':
232	{
233	  uintmax_t n;
234	  if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
235	    try_help ("invalid --bytes value `%s'", optarg);
236	  if (n < bytes)
237	    bytes = n;
238	}
239	break;
240
241      case 's':
242	specify_comparison_type (type_status);
243	break;
244
245      case 'v':
246	printf ("cmp %s\n%s\n\n%s\n\n%s\n",
247		version_string, copyright_string,
248		_(free_software_msgid), _(authorship_msgid));
249	check_stdout ();
250	return EXIT_SUCCESS;
251
252      case HELP_OPTION:
253	usage ();
254	check_stdout ();
255	return EXIT_SUCCESS;
256
257      default:
258	try_help (0, 0);
259      }
260
261  if (optind == argc)
262    try_help ("missing operand after `%s'", argv[argc - 1]);
263
264  file[0] = argv[optind++];
265  file[1] = optind < argc ? argv[optind++] : "-";
266
267  for (f = 0; f < 2 && optind < argc; f++)
268    {
269      char *arg = argv[optind++];
270      ignore_initial[f] = parse_ignore_initial (&arg, 0);
271    }
272
273  if (optind < argc)
274    try_help ("extra operand `%s'", argv[optind]);
275
276  for (f = 0; f < 2; f++)
277    {
278      /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
279	 stdin is closed and opening file[0] yields file descriptor 0.  */
280      int f1 = f ^ (strcmp (file[1], "-") == 0);
281
282      /* Two files with the same name are identical.
283	 But wait until we open the file once, for proper diagnostics.  */
284      if (f && file_name_cmp (file[0], file[1]) == 0)
285	return EXIT_SUCCESS;
286
287      file_desc[f1] = (strcmp (file[f1], "-") == 0
288		       ? STDIN_FILENO
289		       : open (file[f1], O_RDONLY, 0));
290      if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
291	{
292	  if (file_desc[f1] < 0 && comparison_type == type_status)
293	    exit (EXIT_TROUBLE);
294	  else
295	    error (EXIT_TROUBLE, errno, "%s", file[f1]);
296	}
297
298      set_binary_mode (file_desc[f1], 1);
299    }
300
301  /* If the files are links to the same inode and have the same file position,
302     they are identical.  */
303
304#ifdef __APPLE__
305	// conformance tests expect cmp to access the file
306	if(!COMPAT_MODE("bin/cmp", "unix2003"))
307#endif
308  if (0 < same_file (&stat_buf[0], &stat_buf[1])
309      && same_file_attributes (&stat_buf[0], &stat_buf[1])
310      && file_position (0) == file_position (1))
311    return EXIT_SUCCESS;
312
313  /* If output is redirected to the null device, we may assume `-s'.  */
314
315  if (comparison_type != type_status)
316    {
317      struct stat outstat, nullstat;
318
319      if (fstat (STDOUT_FILENO, &outstat) == 0
320	  && stat (NULL_DEVICE, &nullstat) == 0
321	  && 0 < same_file (&outstat, &nullstat))
322	comparison_type = type_status;
323    }
324
325  /* If only a return code is needed,
326     and if both input descriptors are associated with plain files,
327     conclude that the files differ if they have different sizes
328     and if more bytes will be compared than are in the smaller file.  */
329
330  if (comparison_type == type_status
331      && S_ISREG (stat_buf[0].st_mode)
332      && S_ISREG (stat_buf[1].st_mode))
333    {
334      off_t s0 = stat_buf[0].st_size - file_position (0);
335      off_t s1 = stat_buf[1].st_size - file_position (1);
336      if (s0 < 0)
337	s0 = 0;
338      if (s1 < 0)
339	s1 = 0;
340      if (s0 != s1 && MIN (s0, s1) < bytes)
341	exit (EXIT_FAILURE);
342    }
343
344  /* Get the optimal block size of the files.  */
345
346  buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
347			 STAT_BLOCKSIZE (stat_buf[1]),
348			 PTRDIFF_MAX - sizeof (word));
349
350  /* Allocate word-aligned buffers, with space for sentinels at the end.  */
351
352  words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
353  buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
354  buffer[1] = buffer[0] + words_per_buffer;
355
356  exit_status = cmp ();
357
358  for (f = 0; f < 2; f++)
359    if (close (file_desc[f]) != 0)
360      error (EXIT_TROUBLE, errno, "%s", file[f]);
361  if (exit_status != 0  &&  comparison_type != type_status)
362    check_stdout ();
363  exit (exit_status);
364  return exit_status;
365}
366
367/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
368   using `buffer[0]' and `buffer[1]'.
369   Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
370   >1 if error.  */
371
372static int
373cmp (void)
374{
375  off_t line_number = 1;	/* Line number (1...) of difference. */
376  off_t byte_number = 1;	/* Byte number (1...) of difference. */
377  uintmax_t remaining = bytes;	/* Remaining number of bytes to compare.  */
378  size_t read0, read1;		/* Number of bytes read from each file. */
379  size_t first_diff;		/* Offset (0...) in buffers of 1st diff. */
380  size_t smaller;		/* The lesser of `read0' and `read1'. */
381  word *buffer0 = buffer[0];
382  word *buffer1 = buffer[1];
383  char *buf0 = (char *) buffer0;
384  char *buf1 = (char *) buffer1;
385  int ret = EXIT_SUCCESS;
386  int f;
387  int offset_width;
388
389  if (comparison_type == type_all_diffs)
390    {
391      off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
392
393      for (f = 0; f < 2; f++)
394	if (S_ISREG (stat_buf[f].st_mode))
395	  {
396	    off_t file_bytes = stat_buf[f].st_size - file_position (f);
397	    if (file_bytes < byte_number_max)
398	      byte_number_max = file_bytes;
399	  }
400
401      for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
402	continue;
403    }
404
405  for (f = 0; f < 2; f++)
406    {
407      off_t ig = ignore_initial[f];
408      if (ig && file_position (f) == -1)
409	{
410	  /* lseek failed; read and discard the ignored initial prefix.  */
411	  do
412	    {
413	      size_t bytes_to_read = MIN (ig, buf_size);
414	      size_t r = block_read (file_desc[f], buf0, bytes_to_read);
415	      if (r != bytes_to_read)
416		{
417		  if (r == SIZE_MAX)
418		    error (EXIT_TROUBLE, errno, "%s", file[f]);
419		  break;
420		}
421	      ig -= r;
422	    }
423	  while (ig);
424	}
425    }
426
427  do
428    {
429      size_t bytes_to_read = buf_size;
430
431      if (remaining != UINTMAX_MAX)
432	{
433	  if (remaining < bytes_to_read)
434	    bytes_to_read = remaining;
435	  remaining -= bytes_to_read;
436	}
437
438      read0 = block_read (file_desc[0], buf0, bytes_to_read);
439      if (read0 == SIZE_MAX)
440	error (EXIT_TROUBLE, errno, "%s", file[0]);
441      read1 = block_read (file_desc[1], buf1, bytes_to_read);
442      if (read1 == SIZE_MAX)
443	error (EXIT_TROUBLE, errno, "%s", file[1]);
444
445      /* Insert sentinels for the block compare.  */
446
447      buf0[read0] = ~buf1[read0];
448      buf1[read1] = ~buf0[read1];
449
450      /* If the line number should be written for differing files,
451	 compare the blocks and count the number of newlines
452	 simultaneously.  */
453      first_diff = (comparison_type == type_first_diff
454		    ? block_compare_and_count (buffer0, buffer1, &line_number)
455		    : block_compare (buffer0, buffer1));
456
457      byte_number += first_diff;
458      smaller = MIN (read0, read1);
459
460      if (first_diff < smaller)
461	{
462	  switch (comparison_type)
463	    {
464	    case type_first_diff:
465	      {
466		char byte_buf[INT_BUFSIZE_BOUND (off_t)];
467		char line_buf[INT_BUFSIZE_BOUND (off_t)];
468		char const *byte_num = offtostr (byte_number, byte_buf);
469		char const *line_num = offtostr (line_number, line_buf);
470		if (!opt_print_bytes)
471		  {
472		    /* See POSIX 1003.1-2001 for this format.  This
473		       message is used only in the POSIX locale, so it
474		       need not be translated.  */
475		    static char const char_message[] =
476		      "%s %s differ: char %s, line %s\n";
477
478		    /* The POSIX rationale recommends using the word
479		       "byte" outside the POSIX locale.  Some gettext
480		       implementations translate even in the POSIX
481		       locale if certain other environment variables
482		       are set, so use "byte" if a translation is
483		       available, or if outside the POSIX locale.  */
484		    static char const byte_msgid[] =
485		      N_("%s %s differ: byte %s, line %s\n");
486		    char const *byte_message = _(byte_msgid);
487		    bool use_byte_message = (byte_message != byte_msgid
488					     || hard_locale_LC_MESSAGES);
489
490		    printf ((use_byte_message
491			     ? byte_message
492			     : "%s %s differ: char %s, line %s\n"),
493			    file[0], file[1], byte_num, line_num);
494		  }
495		else
496		  {
497		    unsigned char c0 = buf0[first_diff];
498		    unsigned char c1 = buf1[first_diff];
499		    char s0[5];
500		    char s1[5];
501		    sprintc (s0, c0);
502		    sprintc (s1, c1);
503		    printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
504			    file[0], file[1], byte_num, line_num,
505			    c0, s0, c1, s1);
506		}
507	      }
508	      /* Fall through.  */
509	    case type_status:
510	      return EXIT_FAILURE;
511
512	    case type_all_diffs:
513	      do
514		{
515		  unsigned char c0 = buf0[first_diff];
516		  unsigned char c1 = buf1[first_diff];
517		  if (c0 != c1)
518		    {
519		      char byte_buf[INT_BUFSIZE_BOUND (off_t)];
520		      char const *byte_num = offtostr (byte_number, byte_buf);
521		      if (!opt_print_bytes)
522			{
523			  /* See POSIX 1003.1-2001 for this format.  */
524			  printf ("%*s %3o %3o\n",
525				  offset_width, byte_num, c0, c1);
526			}
527		      else
528			{
529			  char s0[5];
530			  char s1[5];
531			  sprintc (s0, c0);
532			  sprintc (s1, c1);
533			  printf ("%*s %3o %-4s %3o %s\n",
534				  offset_width, byte_num, c0, s0, c1, s1);
535			}
536		    }
537		  byte_number++;
538		  first_diff++;
539		}
540	      while (first_diff < smaller);
541	      ret = EXIT_FAILURE;
542	      break;
543	    }
544	}
545
546      if (read0 != read1)
547	{
548	  if (comparison_type != type_status)
549	    {
550	      /* See POSIX 1003.1-2001 for this format.  */
551	      fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
552	    }
553
554	  return EXIT_FAILURE;
555	}
556    }
557  while (read0 == buf_size);
558
559  return ret;
560}
561
562/* Compare two blocks of memory P0 and P1 until they differ,
563   and count the number of '\n' occurrences in the common
564   part of P0 and P1.
565   If the blocks are not guaranteed to be different, put sentinels at the ends
566   of the blocks before calling this function.
567
568   Return the offset of the first byte that differs.
569   Increment *COUNT by the count of '\n' occurrences.  */
570
571static size_t
572block_compare_and_count (word const *p0, word const *p1, off_t *count)
573{
574  word l;		/* One word from first buffer. */
575  word const *l0, *l1;	/* Pointers into each buffer. */
576  char const *c0, *c1;	/* Pointers for finding exact address. */
577  size_t cnt = 0;	/* Number of '\n' occurrences. */
578  word nnnn;		/* Newline, sizeof (word) times.  */
579  int i;
580
581  nnnn = 0;
582  for (i = 0; i < sizeof nnnn; i++)
583    nnnn = (nnnn << CHAR_BIT) | '\n';
584
585  /* Find the rough position of the first difference by reading words,
586     not bytes.  */
587
588  for (l0 = p0, l1 = p1;  (l = *l0) == *l1;  l0++, l1++)
589    {
590      l ^= nnnn;
591      for (i = 0; i < sizeof l; i++)
592	{
593	  cnt += ! (unsigned char) l;
594	  l >>= CHAR_BIT;
595	}
596    }
597
598  /* Find the exact differing position (endianness independent).  */
599
600  for (c0 = (char const *) l0, c1 = (char const *) l1;
601       *c0 == *c1;
602       c0++, c1++)
603    cnt += *c0 == '\n';
604
605  *count += cnt;
606  return c0 - (char const *) p0;
607}
608
609/* Compare two blocks of memory P0 and P1 until they differ.
610   If the blocks are not guaranteed to be different, put sentinels at the ends
611   of the blocks before calling this function.
612
613   Return the offset of the first byte that differs.  */
614
615static size_t
616block_compare (word const *p0, word const *p1)
617{
618  word const *l0, *l1;
619  char const *c0, *c1;
620
621  /* Find the rough position of the first difference by reading words,
622     not bytes.  */
623
624  for (l0 = p0, l1 = p1;  *l0 == *l1;  l0++, l1++)
625    continue;
626
627  /* Find the exact differing position (endianness independent).  */
628
629  for (c0 = (char const *) l0, c1 = (char const *) l1;
630       *c0 == *c1;
631       c0++, c1++)
632    continue;
633
634  return c0 - (char const *) p0;
635}
636
637/* Put into BUF the unsigned char C, making unprintable bytes
638   visible by quoting like cat -t does.  */
639
640static void
641sprintc (char *buf, unsigned char c)
642{
643  if (! ISPRINT (c))
644    {
645      if (c >= 128)
646	{
647	  *buf++ = 'M';
648	  *buf++ = '-';
649	  c -= 128;
650	}
651      if (c < 32)
652	{
653	  *buf++ = '^';
654	  c += 64;
655	}
656      else if (c == 127)
657	{
658	  *buf++ = '^';
659	  c = '?';
660	}
661    }
662
663  *buf++ = c;
664  *buf = 0;
665}
666
667/* Position file F to ignore_initial[F] bytes from its initial position,
668   and yield its new position.  Don't try more than once.  */
669
670static off_t
671file_position (int f)
672{
673  static bool positioned[2];
674  static off_t position[2];
675
676  if (! positioned[f])
677    {
678      positioned[f] = 1;
679      position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
680    }
681  return position[f];
682}
683