1/* cmp - compare two files byte by byte 2 3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001, 4 2002 Free Software Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 See the GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; see the file COPYING. 18 If not, write to the Free Software Foundation, 19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 20 21#include "system.h" 22 23#include <stdio.h> 24#include <cmpbuf.h> 25#include <c-stack.h> 26#include <error.h> 27#include <exitfail.h> 28#include <freesoft.h> 29#include <getopt.h> 30#include <hard-locale.h> 31#include <inttostr.h> 32#include <setmode.h> 33#include <xalloc.h> 34#include <xstrtol.h> 35 36#if defined LC_MESSAGES && ENABLE_NLS 37# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES) 38#else 39# define hard_locale_LC_MESSAGES 0 40#endif 41 42#ifdef __APPLE__ 43#include "get_compat.h" 44#endif 45 46static char const authorship_msgid[] = 47 N_("Written by Torbjorn Granlund and David MacKenzie."); 48 49static char const copyright_string[] = 50 "Copyright (C) 2002 Free Software Foundation, Inc."; 51 52extern char const version_string[]; 53 54static int cmp (void); 55static off_t file_position (int); 56static size_t block_compare (word const *, word const *); 57static size_t block_compare_and_count (word const *, word const *, off_t *); 58static void sprintc (char *, unsigned char); 59 60/* Name under which this program was invoked. */ 61char *program_name; 62 63/* Filenames of the compared files. */ 64static char const *file[2]; 65 66/* File descriptors of the files. */ 67static int file_desc[2]; 68 69/* Status of the files. */ 70static struct stat stat_buf[2]; 71 72/* Read buffers for the files. */ 73static word *buffer[2]; 74 75/* Optimal block size for the files. */ 76static size_t buf_size; 77 78/* Initial prefix to ignore for each file. */ 79static off_t ignore_initial[2]; 80 81/* Number of bytes to compare. */ 82static uintmax_t bytes = UINTMAX_MAX; 83 84/* Output format. */ 85static enum comparison_type 86 { 87 type_first_diff, /* Print the first difference. */ 88 type_all_diffs, /* Print all differences. */ 89 type_status /* Exit status only. */ 90 } comparison_type; 91 92/* If nonzero, print values of bytes quoted like cat -t does. */ 93static bool opt_print_bytes; 94 95/* Values for long options that do not have single-letter equivalents. */ 96enum 97{ 98 HELP_OPTION = CHAR_MAX + 1 99}; 100 101static struct option const long_options[] = 102{ 103 {"print-bytes", 0, 0, 'b'}, 104 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */ 105 {"ignore-initial", 1, 0, 'i'}, 106 {"verbose", 0, 0, 'l'}, 107 {"bytes", 1, 0, 'n'}, 108 {"silent", 0, 0, 's'}, 109 {"quiet", 0, 0, 's'}, 110 {"version", 0, 0, 'v'}, 111 {"help", 0, 0, HELP_OPTION}, 112 {0, 0, 0, 0} 113}; 114 115static void try_help (char const *, char const *) __attribute__((noreturn)); 116static void 117try_help (char const *reason_msgid, char const *operand) 118{ 119 if (reason_msgid) 120 error (0, 0, _(reason_msgid), operand); 121 error (EXIT_TROUBLE, 0, 122 _("Try `%s --help' for more information."), program_name); 123 abort (); 124} 125 126static char const valid_suffixes[] = "kKMGTPEZY0"; 127 128/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to 129 point after the operand. If DELIMITER is nonzero, the operand may 130 be followed by DELIMITER; otherwise it must be null-terminated. */ 131static off_t 132parse_ignore_initial (char **argptr, char delimiter) 133{ 134 uintmax_t val; 135 off_t o; 136 char const *arg = *argptr; 137 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes); 138 if (! (e == LONGINT_OK 139 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter)) 140 || (o = val) < 0 || o != val || val == UINTMAX_MAX) 141 try_help ("invalid --ignore-initial value `%s'", arg); 142 return o; 143} 144 145/* Specify the output format. */ 146static void 147specify_comparison_type (enum comparison_type t) 148{ 149 if (comparison_type) 150 try_help ("options -l and -s are incompatible", 0); 151 comparison_type = t; 152} 153 154static void 155check_stdout (void) 156{ 157 if (ferror (stdout)) 158 error (EXIT_TROUBLE, 0, "%s", _("write failed")); 159 else if (fclose (stdout) != 0) 160 error (EXIT_TROUBLE, errno, "%s", _("standard output")); 161} 162 163static char const * const option_help_msgid[] = { 164 N_("-b --print-bytes Print differing bytes."), 165 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."), 166 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"), 167 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."), 168 N_("-l --verbose Output byte numbers and values of all differing bytes."), 169 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."), 170 N_("-s --quiet --silent Output nothing; yield exit status only."), 171 N_("-v --version Output version info."), 172 N_("--help Output this help."), 173 0 174}; 175 176static void 177usage (void) 178{ 179 char const * const *p; 180 181 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"), 182 program_name); 183 printf ("%s\n\n", _("Compare two files byte by byte.")); 184 for (p = option_help_msgid; *p; p++) 185 printf (" %s\n", _(*p)); 186 printf ("\n%s\n%s\n\n%s\n\n%s\n", 187 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."), 188 _("SKIP values may be followed by the following multiplicative suffixes:\n\ 189kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\ 190GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."), 191 _("If a FILE is `-' or missing, read standard input."), 192 _("Report bugs to <bug-gnu-utils@gnu.org>.")); 193} 194 195int 196main (int argc, char **argv) 197{ 198 int c, f, exit_status; 199 size_t words_per_buffer; 200 201 exit_failure = EXIT_TROUBLE; 202 initialize_main (&argc, &argv); 203 program_name = argv[0]; 204 setlocale (LC_ALL, ""); 205 bindtextdomain (PACKAGE, LOCALEDIR); 206 textdomain (PACKAGE); 207 c_stack_action (c_stack_die); 208 209 /* Parse command line options. */ 210 211 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0)) 212 != -1) 213 switch (c) 214 { 215 case 'b': 216 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */ 217 opt_print_bytes = 1; 218 break; 219 220 case 'i': 221 ignore_initial[0] = parse_ignore_initial (&optarg, ':'); 222 ignore_initial[1] = (*optarg++ == ':' 223 ? parse_ignore_initial (&optarg, 0) 224 : ignore_initial[0]); 225 break; 226 227 case 'l': 228 specify_comparison_type (type_all_diffs); 229 break; 230 231 case 'n': 232 { 233 uintmax_t n; 234 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK) 235 try_help ("invalid --bytes value `%s'", optarg); 236 if (n < bytes) 237 bytes = n; 238 } 239 break; 240 241 case 's': 242 specify_comparison_type (type_status); 243 break; 244 245 case 'v': 246 printf ("cmp %s\n%s\n\n%s\n\n%s\n", 247 version_string, copyright_string, 248 _(free_software_msgid), _(authorship_msgid)); 249 check_stdout (); 250 return EXIT_SUCCESS; 251 252 case HELP_OPTION: 253 usage (); 254 check_stdout (); 255 return EXIT_SUCCESS; 256 257 default: 258 try_help (0, 0); 259 } 260 261 if (optind == argc) 262 try_help ("missing operand after `%s'", argv[argc - 1]); 263 264 file[0] = argv[optind++]; 265 file[1] = optind < argc ? argv[optind++] : "-"; 266 267 for (f = 0; f < 2 && optind < argc; f++) 268 { 269 char *arg = argv[optind++]; 270 ignore_initial[f] = parse_ignore_initial (&arg, 0); 271 } 272 273 if (optind < argc) 274 try_help ("extra operand `%s'", argv[optind]); 275 276 for (f = 0; f < 2; f++) 277 { 278 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if 279 stdin is closed and opening file[0] yields file descriptor 0. */ 280 int f1 = f ^ (strcmp (file[1], "-") == 0); 281 282 /* Two files with the same name are identical. 283 But wait until we open the file once, for proper diagnostics. */ 284 if (f && file_name_cmp (file[0], file[1]) == 0) 285 return EXIT_SUCCESS; 286 287 file_desc[f1] = (strcmp (file[f1], "-") == 0 288 ? STDIN_FILENO 289 : open (file[f1], O_RDONLY, 0)); 290 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0) 291 { 292 if (file_desc[f1] < 0 && comparison_type == type_status) 293 exit (EXIT_TROUBLE); 294 else 295 error (EXIT_TROUBLE, errno, "%s", file[f1]); 296 } 297 298 set_binary_mode (file_desc[f1], 1); 299 } 300 301 /* If the files are links to the same inode and have the same file position, 302 they are identical. */ 303 304#ifdef __APPLE__ 305 // conformance tests expect cmp to access the file 306 if(!COMPAT_MODE("bin/cmp", "unix2003")) 307#endif 308 if (0 < same_file (&stat_buf[0], &stat_buf[1]) 309 && same_file_attributes (&stat_buf[0], &stat_buf[1]) 310 && file_position (0) == file_position (1)) 311 return EXIT_SUCCESS; 312 313 /* If output is redirected to the null device, we may assume `-s'. */ 314 315 if (comparison_type != type_status) 316 { 317 struct stat outstat, nullstat; 318 319 if (fstat (STDOUT_FILENO, &outstat) == 0 320 && stat (NULL_DEVICE, &nullstat) == 0 321 && 0 < same_file (&outstat, &nullstat)) 322 comparison_type = type_status; 323 } 324 325 /* If only a return code is needed, 326 and if both input descriptors are associated with plain files, 327 conclude that the files differ if they have different sizes 328 and if more bytes will be compared than are in the smaller file. */ 329 330 if (comparison_type == type_status 331 && S_ISREG (stat_buf[0].st_mode) 332 && S_ISREG (stat_buf[1].st_mode)) 333 { 334 off_t s0 = stat_buf[0].st_size - file_position (0); 335 off_t s1 = stat_buf[1].st_size - file_position (1); 336 if (s0 < 0) 337 s0 = 0; 338 if (s1 < 0) 339 s1 = 0; 340 if (s0 != s1 && MIN (s0, s1) < bytes) 341 exit (EXIT_FAILURE); 342 } 343 344 /* Get the optimal block size of the files. */ 345 346 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]), 347 STAT_BLOCKSIZE (stat_buf[1]), 348 PTRDIFF_MAX - sizeof (word)); 349 350 /* Allocate word-aligned buffers, with space for sentinels at the end. */ 351 352 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word); 353 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer); 354 buffer[1] = buffer[0] + words_per_buffer; 355 356 exit_status = cmp (); 357 358 for (f = 0; f < 2; f++) 359 if (close (file_desc[f]) != 0) 360 error (EXIT_TROUBLE, errno, "%s", file[f]); 361 if (exit_status != 0 && comparison_type != type_status) 362 check_stdout (); 363 exit (exit_status); 364 return exit_status; 365} 366 367/* Compare the two files already open on `file_desc[0]' and `file_desc[1]', 368 using `buffer[0]' and `buffer[1]'. 369 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different, 370 >1 if error. */ 371 372static int 373cmp (void) 374{ 375 off_t line_number = 1; /* Line number (1...) of difference. */ 376 off_t byte_number = 1; /* Byte number (1...) of difference. */ 377 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */ 378 size_t read0, read1; /* Number of bytes read from each file. */ 379 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */ 380 size_t smaller; /* The lesser of `read0' and `read1'. */ 381 word *buffer0 = buffer[0]; 382 word *buffer1 = buffer[1]; 383 char *buf0 = (char *) buffer0; 384 char *buf1 = (char *) buffer1; 385 int ret = EXIT_SUCCESS; 386 int f; 387 int offset_width; 388 389 if (comparison_type == type_all_diffs) 390 { 391 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t)); 392 393 for (f = 0; f < 2; f++) 394 if (S_ISREG (stat_buf[f].st_mode)) 395 { 396 off_t file_bytes = stat_buf[f].st_size - file_position (f); 397 if (file_bytes < byte_number_max) 398 byte_number_max = file_bytes; 399 } 400 401 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++) 402 continue; 403 } 404 405 for (f = 0; f < 2; f++) 406 { 407 off_t ig = ignore_initial[f]; 408 if (ig && file_position (f) == -1) 409 { 410 /* lseek failed; read and discard the ignored initial prefix. */ 411 do 412 { 413 size_t bytes_to_read = MIN (ig, buf_size); 414 size_t r = block_read (file_desc[f], buf0, bytes_to_read); 415 if (r != bytes_to_read) 416 { 417 if (r == SIZE_MAX) 418 error (EXIT_TROUBLE, errno, "%s", file[f]); 419 break; 420 } 421 ig -= r; 422 } 423 while (ig); 424 } 425 } 426 427 do 428 { 429 size_t bytes_to_read = buf_size; 430 431 if (remaining != UINTMAX_MAX) 432 { 433 if (remaining < bytes_to_read) 434 bytes_to_read = remaining; 435 remaining -= bytes_to_read; 436 } 437 438 read0 = block_read (file_desc[0], buf0, bytes_to_read); 439 if (read0 == SIZE_MAX) 440 error (EXIT_TROUBLE, errno, "%s", file[0]); 441 read1 = block_read (file_desc[1], buf1, bytes_to_read); 442 if (read1 == SIZE_MAX) 443 error (EXIT_TROUBLE, errno, "%s", file[1]); 444 445 /* Insert sentinels for the block compare. */ 446 447 buf0[read0] = ~buf1[read0]; 448 buf1[read1] = ~buf0[read1]; 449 450 /* If the line number should be written for differing files, 451 compare the blocks and count the number of newlines 452 simultaneously. */ 453 first_diff = (comparison_type == type_first_diff 454 ? block_compare_and_count (buffer0, buffer1, &line_number) 455 : block_compare (buffer0, buffer1)); 456 457 byte_number += first_diff; 458 smaller = MIN (read0, read1); 459 460 if (first_diff < smaller) 461 { 462 switch (comparison_type) 463 { 464 case type_first_diff: 465 { 466 char byte_buf[INT_BUFSIZE_BOUND (off_t)]; 467 char line_buf[INT_BUFSIZE_BOUND (off_t)]; 468 char const *byte_num = offtostr (byte_number, byte_buf); 469 char const *line_num = offtostr (line_number, line_buf); 470 if (!opt_print_bytes) 471 { 472 /* See POSIX 1003.1-2001 for this format. This 473 message is used only in the POSIX locale, so it 474 need not be translated. */ 475 static char const char_message[] = 476 "%s %s differ: char %s, line %s\n"; 477 478 /* The POSIX rationale recommends using the word 479 "byte" outside the POSIX locale. Some gettext 480 implementations translate even in the POSIX 481 locale if certain other environment variables 482 are set, so use "byte" if a translation is 483 available, or if outside the POSIX locale. */ 484 static char const byte_msgid[] = 485 N_("%s %s differ: byte %s, line %s\n"); 486 char const *byte_message = _(byte_msgid); 487 bool use_byte_message = (byte_message != byte_msgid 488 || hard_locale_LC_MESSAGES); 489 490 printf ((use_byte_message 491 ? byte_message 492 : "%s %s differ: char %s, line %s\n"), 493 file[0], file[1], byte_num, line_num); 494 } 495 else 496 { 497 unsigned char c0 = buf0[first_diff]; 498 unsigned char c1 = buf1[first_diff]; 499 char s0[5]; 500 char s1[5]; 501 sprintc (s0, c0); 502 sprintc (s1, c1); 503 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"), 504 file[0], file[1], byte_num, line_num, 505 c0, s0, c1, s1); 506 } 507 } 508 /* Fall through. */ 509 case type_status: 510 return EXIT_FAILURE; 511 512 case type_all_diffs: 513 do 514 { 515 unsigned char c0 = buf0[first_diff]; 516 unsigned char c1 = buf1[first_diff]; 517 if (c0 != c1) 518 { 519 char byte_buf[INT_BUFSIZE_BOUND (off_t)]; 520 char const *byte_num = offtostr (byte_number, byte_buf); 521 if (!opt_print_bytes) 522 { 523 /* See POSIX 1003.1-2001 for this format. */ 524 printf ("%*s %3o %3o\n", 525 offset_width, byte_num, c0, c1); 526 } 527 else 528 { 529 char s0[5]; 530 char s1[5]; 531 sprintc (s0, c0); 532 sprintc (s1, c1); 533 printf ("%*s %3o %-4s %3o %s\n", 534 offset_width, byte_num, c0, s0, c1, s1); 535 } 536 } 537 byte_number++; 538 first_diff++; 539 } 540 while (first_diff < smaller); 541 ret = EXIT_FAILURE; 542 break; 543 } 544 } 545 546 if (read0 != read1) 547 { 548 if (comparison_type != type_status) 549 { 550 /* See POSIX 1003.1-2001 for this format. */ 551 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]); 552 } 553 554 return EXIT_FAILURE; 555 } 556 } 557 while (read0 == buf_size); 558 559 return ret; 560} 561 562/* Compare two blocks of memory P0 and P1 until they differ, 563 and count the number of '\n' occurrences in the common 564 part of P0 and P1. 565 If the blocks are not guaranteed to be different, put sentinels at the ends 566 of the blocks before calling this function. 567 568 Return the offset of the first byte that differs. 569 Increment *COUNT by the count of '\n' occurrences. */ 570 571static size_t 572block_compare_and_count (word const *p0, word const *p1, off_t *count) 573{ 574 word l; /* One word from first buffer. */ 575 word const *l0, *l1; /* Pointers into each buffer. */ 576 char const *c0, *c1; /* Pointers for finding exact address. */ 577 size_t cnt = 0; /* Number of '\n' occurrences. */ 578 word nnnn; /* Newline, sizeof (word) times. */ 579 int i; 580 581 nnnn = 0; 582 for (i = 0; i < sizeof nnnn; i++) 583 nnnn = (nnnn << CHAR_BIT) | '\n'; 584 585 /* Find the rough position of the first difference by reading words, 586 not bytes. */ 587 588 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++) 589 { 590 l ^= nnnn; 591 for (i = 0; i < sizeof l; i++) 592 { 593 cnt += ! (unsigned char) l; 594 l >>= CHAR_BIT; 595 } 596 } 597 598 /* Find the exact differing position (endianness independent). */ 599 600 for (c0 = (char const *) l0, c1 = (char const *) l1; 601 *c0 == *c1; 602 c0++, c1++) 603 cnt += *c0 == '\n'; 604 605 *count += cnt; 606 return c0 - (char const *) p0; 607} 608 609/* Compare two blocks of memory P0 and P1 until they differ. 610 If the blocks are not guaranteed to be different, put sentinels at the ends 611 of the blocks before calling this function. 612 613 Return the offset of the first byte that differs. */ 614 615static size_t 616block_compare (word const *p0, word const *p1) 617{ 618 word const *l0, *l1; 619 char const *c0, *c1; 620 621 /* Find the rough position of the first difference by reading words, 622 not bytes. */ 623 624 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++) 625 continue; 626 627 /* Find the exact differing position (endianness independent). */ 628 629 for (c0 = (char const *) l0, c1 = (char const *) l1; 630 *c0 == *c1; 631 c0++, c1++) 632 continue; 633 634 return c0 - (char const *) p0; 635} 636 637/* Put into BUF the unsigned char C, making unprintable bytes 638 visible by quoting like cat -t does. */ 639 640static void 641sprintc (char *buf, unsigned char c) 642{ 643 if (! ISPRINT (c)) 644 { 645 if (c >= 128) 646 { 647 *buf++ = 'M'; 648 *buf++ = '-'; 649 c -= 128; 650 } 651 if (c < 32) 652 { 653 *buf++ = '^'; 654 c += 64; 655 } 656 else if (c == 127) 657 { 658 *buf++ = '^'; 659 c = '?'; 660 } 661 } 662 663 *buf++ = c; 664 *buf = 0; 665} 666 667/* Position file F to ignore_initial[F] bytes from its initial position, 668 and yield its new position. Don't try more than once. */ 669 670static off_t 671file_position (int f) 672{ 673 static bool positioned[2]; 674 static off_t position[2]; 675 676 if (! positioned[f]) 677 { 678 positioned[f] = 1; 679 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR); 680 } 681 return position[f]; 682} 683