1/*	$NetBSD: cmp.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $	*/
2
3/* cmp - compare two files byte by byte
4
5   Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
6   2002 Free Software Foundation, Inc.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16   See the GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; see the file COPYING.
20   If not, write to the Free Software Foundation,
21   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23#include "system.h"
24
25#include <stdio.h>
26#include <cmpbuf.h>
27#include <c-stack.h>
28#include <error.h>
29#include <exitfail.h>
30#include <freesoft.h>
31#include <getopt.h>
32#include <hard-locale.h>
33#include <inttostr.h>
34#include <setmode.h>
35#include <xalloc.h>
36#include <xstrtol.h>
37
38#if defined LC_MESSAGES && ENABLE_NLS
39# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
40#else
41# define hard_locale_LC_MESSAGES 0
42#endif
43
44static char const authorship_msgid[] =
45  N_("Written by Torbjorn Granlund and David MacKenzie.");
46
47static char const copyright_string[] =
48  "Copyright (C) 2002 Free Software Foundation, Inc.";
49
50extern char const version_string[];
51
52static int cmp (void);
53static off_t file_position (int);
54static size_t block_compare (word const *, word const *);
55static size_t block_compare_and_count (word const *, word const *, off_t *);
56static void sprintc (char *, unsigned char);
57
58/* Name under which this program was invoked.  */
59char *program_name;
60
61/* Filenames of the compared files.  */
62static char const *file[2];
63
64/* File descriptors of the files.  */
65static int file_desc[2];
66
67/* Status of the files.  */
68static struct stat stat_buf[2];
69
70/* Read buffers for the files.  */
71static word *buffer[2];
72
73/* Optimal block size for the files.  */
74static size_t buf_size;
75
76/* Initial prefix to ignore for each file.  */
77static off_t ignore_initial[2];
78
79/* Number of bytes to compare.  */
80static uintmax_t bytes = UINTMAX_MAX;
81
82/* Output format.  */
83static enum comparison_type
84  {
85    type_first_diff,	/* Print the first difference.  */
86    type_all_diffs,	/* Print all differences.  */
87    type_status		/* Exit status only.  */
88  } comparison_type;
89
90/* If nonzero, print values of bytes quoted like cat -t does. */
91static bool opt_print_bytes;
92
93/* Values for long options that do not have single-letter equivalents.  */
94enum
95{
96  HELP_OPTION = CHAR_MAX + 1
97};
98
99static struct option const long_options[] =
100{
101  {"print-bytes", 0, 0, 'b'},
102  {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
103  {"ignore-initial", 1, 0, 'i'},
104  {"verbose", 0, 0, 'l'},
105  {"bytes", 1, 0, 'n'},
106  {"silent", 0, 0, 's'},
107  {"quiet", 0, 0, 's'},
108  {"version", 0, 0, 'v'},
109  {"help", 0, 0, HELP_OPTION},
110  {0, 0, 0, 0}
111};
112
113static void try_help (char const *, char const *) __attribute__((noreturn));
114static void
115try_help (char const *reason_msgid, char const *operand)
116{
117  if (reason_msgid)
118    error (0, 0, _(reason_msgid), operand);
119  error (EXIT_TROUBLE, 0,
120	 _("Try `%s --help' for more information."), program_name);
121  abort ();
122}
123
124static char const valid_suffixes[] = "kKMGTPEZY0";
125
126/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
127   point after the operand.  If DELIMITER is nonzero, the operand may
128   be followed by DELIMITER; otherwise it must be null-terminated.  */
129static off_t
130parse_ignore_initial (char **argptr, char delimiter)
131{
132  uintmax_t val;
133  off_t o;
134  char const *arg = *argptr;
135  strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
136  if (! (e == LONGINT_OK
137	 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
138      || (o = val) < 0 || o != val || val == UINTMAX_MAX)
139    try_help ("invalid --ignore-initial value `%s'", arg);
140  return o;
141}
142
143/* Specify the output format.  */
144static void
145specify_comparison_type (enum comparison_type t)
146{
147  if (comparison_type)
148    try_help ("options -l and -s are incompatible", 0);
149  comparison_type = t;
150}
151
152static void
153check_stdout (void)
154{
155  if (ferror (stdout))
156    error (EXIT_TROUBLE, 0, "%s", _("write failed"));
157  else if (fclose (stdout) != 0)
158    error (EXIT_TROUBLE, errno, "%s", _("standard output"));
159}
160
161static char const * const option_help_msgid[] = {
162  N_("-b  --print-bytes  Print differing bytes."),
163  N_("-i SKIP  --ignore-initial=SKIP  Skip the first SKIP bytes of input."),
164  N_("-i SKIP1:SKIP2  --ignore-initial=SKIP1:SKIP2"),
165  N_("  Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
166  N_("-l  --verbose  Output byte numbers and values of all differing bytes."),
167  N_("-n LIMIT  --bytes=LIMIT  Compare at most LIMIT bytes."),
168  N_("-s  --quiet  --silent  Output nothing; yield exit status only."),
169  N_("-v  --version  Output version info."),
170  N_("--help  Output this help."),
171  0
172};
173
174static void
175usage (void)
176{
177  char const * const *p;
178
179  printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
180	  program_name);
181  printf ("%s\n\n", _("Compare two files byte by byte."));
182  for (p = option_help_msgid;  *p;  p++)
183    printf ("  %s\n", _(*p));
184  printf ("\n%s\n%s\n\n%s\n\n%s\n",
185	  _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
186	  _("SKIP values may be followed by the following multiplicative suffixes:\n\
187kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
188GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
189	  _("If a FILE is `-' or missing, read standard input."),
190	  _("Report bugs to <bug-gnu-utils@gnu.org>."));
191}
192
193int
194main (int argc, char **argv)
195{
196  int c, f, exit_status;
197  size_t words_per_buffer;
198
199  exit_failure = EXIT_TROUBLE;
200  initialize_main (&argc, &argv);
201  program_name = argv[0];
202  setlocale (LC_ALL, "");
203  bindtextdomain (PACKAGE, LOCALEDIR);
204  textdomain (PACKAGE);
205  c_stack_action (c_stack_die);
206
207  /* Parse command line options.  */
208
209  while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
210	 != -1)
211    switch (c)
212      {
213      case 'b':
214      case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
215	opt_print_bytes = 1;
216	break;
217
218      case 'i':
219	ignore_initial[0] = parse_ignore_initial (&optarg, ':');
220	ignore_initial[1] = (*optarg++ == ':'
221			     ? parse_ignore_initial (&optarg, 0)
222			     : ignore_initial[0]);
223	break;
224
225      case 'l':
226	specify_comparison_type (type_all_diffs);
227	break;
228
229      case 'n':
230	{
231	  uintmax_t n;
232	  if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
233	    try_help ("invalid --bytes value `%s'", optarg);
234	  if (n < bytes)
235	    bytes = n;
236	}
237	break;
238
239      case 's':
240	specify_comparison_type (type_status);
241	break;
242
243      case 'v':
244	printf ("cmp %s\n%s\n\n%s\n\n%s\n",
245		version_string, copyright_string,
246		_(free_software_msgid), _(authorship_msgid));
247	check_stdout ();
248	return EXIT_SUCCESS;
249
250      case HELP_OPTION:
251	usage ();
252	check_stdout ();
253	return EXIT_SUCCESS;
254
255      default:
256	try_help (0, 0);
257      }
258
259  if (optind == argc)
260    try_help ("missing operand after `%s'", argv[argc - 1]);
261
262  file[0] = argv[optind++];
263  file[1] = optind < argc ? argv[optind++] : "-";
264
265  for (f = 0; f < 2 && optind < argc; f++)
266    {
267      char *arg = argv[optind++];
268      ignore_initial[f] = parse_ignore_initial (&arg, 0);
269    }
270
271  if (optind < argc)
272    try_help ("extra operand `%s'", argv[optind]);
273
274  for (f = 0; f < 2; f++)
275    {
276      /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
277	 stdin is closed and opening file[0] yields file descriptor 0.  */
278      int f1 = f ^ (strcmp (file[1], "-") == 0);
279
280      /* Two files with the same name are identical.
281	 But wait until we open the file once, for proper diagnostics.  */
282      if (f && file_name_cmp (file[0], file[1]) == 0)
283	return EXIT_SUCCESS;
284
285      file_desc[f1] = (strcmp (file[f1], "-") == 0
286		       ? STDIN_FILENO
287		       : open (file[f1], O_RDONLY, 0));
288      if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
289	{
290	  if (file_desc[f1] < 0 && comparison_type == type_status)
291	    exit (EXIT_TROUBLE);
292	  else
293	    error (EXIT_TROUBLE, errno, "%s", file[f1]);
294	}
295
296      set_binary_mode (file_desc[f1], 1);
297    }
298
299  /* If the files are links to the same inode and have the same file position,
300     they are identical.  */
301
302  if (0 < same_file (&stat_buf[0], &stat_buf[1])
303      && same_file_attributes (&stat_buf[0], &stat_buf[1])
304      && file_position (0) == file_position (1))
305    return EXIT_SUCCESS;
306
307  /* If output is redirected to the null device, we may assume `-s'.  */
308
309  if (comparison_type != type_status)
310    {
311      struct stat outstat, nullstat;
312
313      if (fstat (STDOUT_FILENO, &outstat) == 0
314	  && stat (NULL_DEVICE, &nullstat) == 0
315	  && 0 < same_file (&outstat, &nullstat))
316	comparison_type = type_status;
317    }
318
319  /* If only a return code is needed,
320     and if both input descriptors are associated with plain files,
321     conclude that the files differ if they have different sizes
322     and if more bytes will be compared than are in the smaller file.  */
323
324  if (comparison_type == type_status
325      && S_ISREG (stat_buf[0].st_mode)
326      && S_ISREG (stat_buf[1].st_mode))
327    {
328      off_t s0 = stat_buf[0].st_size - file_position (0);
329      off_t s1 = stat_buf[1].st_size - file_position (1);
330      if (s0 < 0)
331	s0 = 0;
332      if (s1 < 0)
333	s1 = 0;
334      if (s0 != s1 && MIN (s0, s1) < bytes)
335	exit (EXIT_FAILURE);
336    }
337
338  /* Get the optimal block size of the files.  */
339
340  buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
341			 STAT_BLOCKSIZE (stat_buf[1]),
342			 PTRDIFF_MAX - sizeof (word));
343
344  /* Allocate word-aligned buffers, with space for sentinels at the end.  */
345
346  words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
347  buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
348  buffer[1] = buffer[0] + words_per_buffer;
349
350  exit_status = cmp ();
351
352  for (f = 0; f < 2; f++)
353    if (close (file_desc[f]) != 0)
354      error (EXIT_TROUBLE, errno, "%s", file[f]);
355  if (exit_status != 0  &&  comparison_type != type_status)
356    check_stdout ();
357  exit (exit_status);
358  return exit_status;
359}
360
361/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
362   using `buffer[0]' and `buffer[1]'.
363   Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
364   >1 if error.  */
365
366static int
367cmp (void)
368{
369  off_t line_number = 1;	/* Line number (1...) of difference. */
370  off_t byte_number = 1;	/* Byte number (1...) of difference. */
371  uintmax_t remaining = bytes;	/* Remaining number of bytes to compare.  */
372  size_t read0, read1;		/* Number of bytes read from each file. */
373  size_t first_diff;		/* Offset (0...) in buffers of 1st diff. */
374  size_t smaller;		/* The lesser of `read0' and `read1'. */
375  word *buffer0 = buffer[0];
376  word *buffer1 = buffer[1];
377  char *buf0 = (char *) buffer0;
378  char *buf1 = (char *) buffer1;
379  int ret = EXIT_SUCCESS;
380  int f;
381  int offset_width;
382
383  if (comparison_type == type_all_diffs)
384    {
385      off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
386
387      for (f = 0; f < 2; f++)
388	if (S_ISREG (stat_buf[f].st_mode))
389	  {
390	    off_t file_bytes = stat_buf[f].st_size - file_position (f);
391	    if (file_bytes < byte_number_max)
392	      byte_number_max = file_bytes;
393	  }
394
395      for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
396	continue;
397    }
398
399  for (f = 0; f < 2; f++)
400    {
401      off_t ig = ignore_initial[f];
402      if (ig && file_position (f) == -1)
403	{
404	  /* lseek failed; read and discard the ignored initial prefix.  */
405	  do
406	    {
407	      size_t bytes_to_read = MIN (ig, buf_size);
408	      size_t r = block_read (file_desc[f], buf0, bytes_to_read);
409	      if (r != bytes_to_read)
410		{
411		  if (r == SIZE_MAX)
412		    error (EXIT_TROUBLE, errno, "%s", file[f]);
413		  break;
414		}
415	      ig -= r;
416	    }
417	  while (ig);
418	}
419    }
420
421  do
422    {
423      size_t bytes_to_read = buf_size;
424
425      if (remaining != UINTMAX_MAX)
426	{
427	  if (remaining < bytes_to_read)
428	    bytes_to_read = remaining;
429	  remaining -= bytes_to_read;
430	}
431
432      read0 = block_read (file_desc[0], buf0, bytes_to_read);
433      if (read0 == SIZE_MAX)
434	error (EXIT_TROUBLE, errno, "%s", file[0]);
435      read1 = block_read (file_desc[1], buf1, bytes_to_read);
436      if (read1 == SIZE_MAX)
437	error (EXIT_TROUBLE, errno, "%s", file[1]);
438
439      /* Insert sentinels for the block compare.  */
440
441      buf0[read0] = ~buf1[read0];
442      buf1[read1] = ~buf0[read1];
443
444      /* If the line number should be written for differing files,
445	 compare the blocks and count the number of newlines
446	 simultaneously.  */
447      first_diff = (comparison_type == type_first_diff
448		    ? block_compare_and_count (buffer0, buffer1, &line_number)
449		    : block_compare (buffer0, buffer1));
450
451      byte_number += first_diff;
452      smaller = MIN (read0, read1);
453
454      if (first_diff < smaller)
455	{
456	  switch (comparison_type)
457	    {
458	    case type_first_diff:
459	      {
460		char byte_buf[INT_BUFSIZE_BOUND (off_t)];
461		char line_buf[INT_BUFSIZE_BOUND (off_t)];
462		char const *byte_num = offtostr (byte_number, byte_buf);
463		char const *line_num = offtostr (line_number, line_buf);
464		if (!opt_print_bytes)
465		  {
466		    /* See POSIX 1003.1-2001 for this format.  This
467		       message is used only in the POSIX locale, so it
468		       need not be translated.  */
469		    static char const char_message[] =
470		      "%s %s differ: char %s, line %s\n";
471
472		    /* The POSIX rationale recommends using the word
473		       "byte" outside the POSIX locale.  Some gettext
474		       implementations translate even in the POSIX
475		       locale if certain other environment variables
476		       are set, so use "byte" if a translation is
477		       available, or if outside the POSIX locale.  */
478		    static char const byte_msgid[] =
479		      N_("%s %s differ: byte %s, line %s\n");
480		    char const *byte_message = _(byte_msgid);
481		    bool use_byte_message = (byte_message != byte_msgid
482					     || hard_locale_LC_MESSAGES);
483
484		    printf ((use_byte_message
485			     ? byte_message
486			     : "%s %s differ: char %s, line %s\n"),
487			    file[0], file[1], byte_num, line_num);
488		  }
489		else
490		  {
491		    unsigned char c0 = buf0[first_diff];
492		    unsigned char c1 = buf1[first_diff];
493		    char s0[5];
494		    char s1[5];
495		    sprintc (s0, c0);
496		    sprintc (s1, c1);
497		    printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498			    file[0], file[1], byte_num, line_num,
499			    c0, s0, c1, s1);
500		}
501	      }
502	      /* Fall through.  */
503	    case type_status:
504	      return EXIT_FAILURE;
505
506	    case type_all_diffs:
507	      do
508		{
509		  unsigned char c0 = buf0[first_diff];
510		  unsigned char c1 = buf1[first_diff];
511		  if (c0 != c1)
512		    {
513		      char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514		      char const *byte_num = offtostr (byte_number, byte_buf);
515		      if (!opt_print_bytes)
516			{
517			  /* See POSIX 1003.1-2001 for this format.  */
518			  printf ("%*s %3o %3o\n",
519				  offset_width, byte_num, c0, c1);
520			}
521		      else
522			{
523			  char s0[5];
524			  char s1[5];
525			  sprintc (s0, c0);
526			  sprintc (s1, c1);
527			  printf ("%*s %3o %-4s %3o %s\n",
528				  offset_width, byte_num, c0, s0, c1, s1);
529			}
530		    }
531		  byte_number++;
532		  first_diff++;
533		}
534	      while (first_diff < smaller);
535	      ret = EXIT_FAILURE;
536	      break;
537	    }
538	}
539
540      if (read0 != read1)
541	{
542	  if (comparison_type != type_status)
543	    {
544	      /* See POSIX 1003.1-2001 for this format.  */
545	      fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
546	    }
547
548	  return EXIT_FAILURE;
549	}
550    }
551  while (read0 == buf_size);
552
553  return ret;
554}
555
556/* Compare two blocks of memory P0 and P1 until they differ,
557   and count the number of '\n' occurrences in the common
558   part of P0 and P1.
559   If the blocks are not guaranteed to be different, put sentinels at the ends
560   of the blocks before calling this function.
561
562   Return the offset of the first byte that differs.
563   Increment *COUNT by the count of '\n' occurrences.  */
564
565static size_t
566block_compare_and_count (word const *p0, word const *p1, off_t *count)
567{
568  word l;		/* One word from first buffer. */
569  word const *l0, *l1;	/* Pointers into each buffer. */
570  char const *c0, *c1;	/* Pointers for finding exact address. */
571  size_t cnt = 0;	/* Number of '\n' occurrences. */
572  word nnnn;		/* Newline, sizeof (word) times.  */
573  int i;
574
575  nnnn = 0;
576  for (i = 0; i < sizeof nnnn; i++)
577    nnnn = (nnnn << CHAR_BIT) | '\n';
578
579  /* Find the rough position of the first difference by reading words,
580     not bytes.  */
581
582  for (l0 = p0, l1 = p1;  (l = *l0) == *l1;  l0++, l1++)
583    {
584      l ^= nnnn;
585      for (i = 0; i < sizeof l; i++)
586	{
587	  cnt += ! (unsigned char) l;
588	  l >>= CHAR_BIT;
589	}
590    }
591
592  /* Find the exact differing position (endianness independent).  */
593
594  for (c0 = (char const *) l0, c1 = (char const *) l1;
595       *c0 == *c1;
596       c0++, c1++)
597    cnt += *c0 == '\n';
598
599  *count += cnt;
600  return c0 - (char const *) p0;
601}
602
603/* Compare two blocks of memory P0 and P1 until they differ.
604   If the blocks are not guaranteed to be different, put sentinels at the ends
605   of the blocks before calling this function.
606
607   Return the offset of the first byte that differs.  */
608
609static size_t
610block_compare (word const *p0, word const *p1)
611{
612  word const *l0, *l1;
613  char const *c0, *c1;
614
615  /* Find the rough position of the first difference by reading words,
616     not bytes.  */
617
618  for (l0 = p0, l1 = p1;  *l0 == *l1;  l0++, l1++)
619    continue;
620
621  /* Find the exact differing position (endianness independent).  */
622
623  for (c0 = (char const *) l0, c1 = (char const *) l1;
624       *c0 == *c1;
625       c0++, c1++)
626    continue;
627
628  return c0 - (char const *) p0;
629}
630
631/* Put into BUF the unsigned char C, making unprintable bytes
632   visible by quoting like cat -t does.  */
633
634static void
635sprintc (char *buf, unsigned char c)
636{
637  if (! ISPRINT (c))
638    {
639      if (c >= 128)
640	{
641	  *buf++ = 'M';
642	  *buf++ = '-';
643	  c -= 128;
644	}
645      if (c < 32)
646	{
647	  *buf++ = '^';
648	  c += 64;
649	}
650      else if (c == 127)
651	{
652	  *buf++ = '^';
653	  c = '?';
654	}
655    }
656
657  *buf++ = c;
658  *buf = 0;
659}
660
661/* Position file F to ignore_initial[F] bytes from its initial position,
662   and yield its new position.  Don't try more than once.  */
663
664static off_t
665file_position (int f)
666{
667  static bool positioned[2];
668  static off_t position[2];
669
670  if (! positioned[f])
671    {
672      positioned[f] = 1;
673      position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
674    }
675  return position[f];
676}
677