1/* cmp - compare two files byte by byte 2 3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001, 4 2002 Free Software Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 See the GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; see the file COPYING. 18 If not, write to the Free Software Foundation, 19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 20 21#include "system.h" 22 23#include <stdio.h> 24#include <cmpbuf.h> 25#include <c-stack.h> 26#include <error.h> 27#include <exitfail.h> 28#include <freesoft.h> 29#include <getopt.h> 30#include <hard-locale.h> 31#include <inttostr.h> 32#include <setmode.h> 33#include <xalloc.h> 34#include <xstrtol.h> 35 36#if defined LC_MESSAGES && ENABLE_NLS 37# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES) 38#else 39# define hard_locale_LC_MESSAGES 0 40#endif 41 42static char const authorship_msgid[] = 43 N_("Written by Torbjorn Granlund and David MacKenzie."); 44 45static char const copyright_string[] = 46 "Copyright (C) 2002 Free Software Foundation, Inc."; 47 48extern char const version_string[]; 49 50static int cmp (void); 51static off_t file_position (int); 52static size_t block_compare (word const *, word const *); 53static size_t block_compare_and_count (word const *, word const *, off_t *); 54static void sprintc (char *, unsigned char); 55 56/* Name under which this program was invoked. */ 57char *program_name; 58 59/* Filenames of the compared files. */ 60static char const *file[2]; 61 62/* File descriptors of the files. */ 63static int file_desc[2]; 64 65/* Status of the files. */ 66static struct stat stat_buf[2]; 67 68/* Read buffers for the files. */ 69static word *buffer[2]; 70 71/* Optimal block size for the files. */ 72static size_t buf_size; 73 74/* Initial prefix to ignore for each file. */ 75static off_t ignore_initial[2]; 76 77/* Number of bytes to compare. */ 78static uintmax_t bytes = UINTMAX_MAX; 79 80/* Output format. */ 81static enum comparison_type 82 { 83 type_first_diff, /* Print the first difference. */ 84 type_all_diffs, /* Print all differences. */ 85 type_status /* Exit status only. */ 86 } comparison_type; 87 88/* If nonzero, print values of bytes quoted like cat -t does. */ 89static bool opt_print_bytes; 90 91/* Values for long options that do not have single-letter equivalents. */ 92enum 93{ 94 HELP_OPTION = CHAR_MAX + 1 95}; 96 97static struct option const long_options[] = 98{ 99 {"print-bytes", 0, 0, 'b'}, 100 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */ 101 {"ignore-initial", 1, 0, 'i'}, 102 {"verbose", 0, 0, 'l'}, 103 {"bytes", 1, 0, 'n'}, 104 {"silent", 0, 0, 's'}, 105 {"quiet", 0, 0, 's'}, 106 {"version", 0, 0, 'v'}, 107 {"help", 0, 0, HELP_OPTION}, 108 {0, 0, 0, 0} 109}; 110 111static void try_help (char const *, char const *) __attribute__((noreturn)); 112static void 113try_help (char const *reason_msgid, char const *operand) 114{ 115 if (reason_msgid) 116 error (0, 0, _(reason_msgid), operand); 117 error (EXIT_TROUBLE, 0, 118 _("Try `%s --help' for more information."), program_name); 119 abort (); 120} 121 122static char const valid_suffixes[] = "kKMGTPEZY0"; 123 124/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to 125 point after the operand. If DELIMITER is nonzero, the operand may 126 be followed by DELIMITER; otherwise it must be null-terminated. */ 127static off_t 128parse_ignore_initial (char **argptr, char delimiter) 129{ 130 uintmax_t val; 131 off_t o; 132 char const *arg = *argptr; 133 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes); 134 if (! (e == LONGINT_OK 135 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter)) 136 || (o = val) < 0 || o != val || val == UINTMAX_MAX) 137 try_help ("invalid --ignore-initial value `%s'", arg); 138 return o; 139} 140 141/* Specify the output format. */ 142static void 143specify_comparison_type (enum comparison_type t) 144{ 145 if (comparison_type) 146 try_help ("options -l and -s are incompatible", 0); 147 comparison_type = t; 148} 149 150static void 151check_stdout (void) 152{ 153 if (ferror (stdout)) 154 error (EXIT_TROUBLE, 0, "%s", _("write failed")); 155 else if (fclose (stdout) != 0) 156 error (EXIT_TROUBLE, errno, "%s", _("standard output")); 157} 158 159static char const * const option_help_msgid[] = { 160 N_("-b --print-bytes Print differing bytes."), 161 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."), 162 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"), 163 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."), 164 N_("-l --verbose Output byte numbers and values of all differing bytes."), 165 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."), 166 N_("-s --quiet --silent Output nothing; yield exit status only."), 167 N_("-v --version Output version info."), 168 N_("--help Output this help."), 169 0 170}; 171 172static void 173usage (void) 174{ 175 char const * const *p; 176 177 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"), 178 program_name); 179 printf ("%s\n\n", _("Compare two files byte by byte.")); 180 for (p = option_help_msgid; *p; p++) 181 printf (" %s\n", _(*p)); 182 printf ("\n%s\n%s\n\n%s\n\n%s\n", 183 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."), 184 _("SKIP values may be followed by the following multiplicative suffixes:\n\ 185kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\ 186GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."), 187 _("If a FILE is `-' or missing, read standard input."), 188 _("Report bugs to <bug-gnu-utils@gnu.org>.")); 189} 190 191int 192main (int argc, char **argv) 193{ 194 int c, f, exit_status; 195 size_t words_per_buffer; 196 197 exit_failure = EXIT_TROUBLE; 198 initialize_main (&argc, &argv); 199 program_name = argv[0]; 200 setlocale (LC_ALL, ""); 201 bindtextdomain (PACKAGE, LOCALEDIR); 202 textdomain (PACKAGE); 203 c_stack_action (c_stack_die); 204 205 /* Parse command line options. */ 206 207 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0)) 208 != -1) 209 switch (c) 210 { 211 case 'b': 212 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */ 213 opt_print_bytes = 1; 214 break; 215 216 case 'i': 217 ignore_initial[0] = parse_ignore_initial (&optarg, ':'); 218 ignore_initial[1] = (*optarg++ == ':' 219 ? parse_ignore_initial (&optarg, 0) 220 : ignore_initial[0]); 221 break; 222 223 case 'l': 224 specify_comparison_type (type_all_diffs); 225 break; 226 227 case 'n': 228 { 229 uintmax_t n; 230 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK) 231 try_help ("invalid --bytes value `%s'", optarg); 232 if (n < bytes) 233 bytes = n; 234 } 235 break; 236 237 case 's': 238 specify_comparison_type (type_status); 239 break; 240 241 case 'v': 242 printf ("cmp %s\n%s\n\n%s\n\n%s\n", 243 version_string, copyright_string, 244 _(free_software_msgid), _(authorship_msgid)); 245 check_stdout (); 246 return EXIT_SUCCESS; 247 248 case HELP_OPTION: 249 usage (); 250 check_stdout (); 251 return EXIT_SUCCESS; 252 253 default: 254 try_help (0, 0); 255 } 256 257 if (optind == argc) 258 try_help ("missing operand after `%s'", argv[argc - 1]); 259 260 file[0] = argv[optind++]; 261 file[1] = optind < argc ? argv[optind++] : "-"; 262 263 for (f = 0; f < 2 && optind < argc; f++) 264 { 265 char *arg = argv[optind++]; 266 ignore_initial[f] = parse_ignore_initial (&arg, 0); 267 } 268 269 if (optind < argc) 270 try_help ("extra operand `%s'", argv[optind]); 271 272 for (f = 0; f < 2; f++) 273 { 274 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if 275 stdin is closed and opening file[0] yields file descriptor 0. */ 276 int f1 = f ^ (strcmp (file[1], "-") == 0); 277 278 /* Two files with the same name are identical. 279 But wait until we open the file once, for proper diagnostics. */ 280 if (f && file_name_cmp (file[0], file[1]) == 0) 281 return EXIT_SUCCESS; 282 283 file_desc[f1] = (strcmp (file[f1], "-") == 0 284 ? STDIN_FILENO 285 : open (file[f1], O_RDONLY, 0)); 286 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0) 287 { 288 if (file_desc[f1] < 0 && comparison_type == type_status) 289 exit (EXIT_TROUBLE); 290 else 291 error (EXIT_TROUBLE, errno, "%s", file[f1]); 292 } 293 294 set_binary_mode (file_desc[f1], 1); 295 } 296 297 /* If the files are links to the same inode and have the same file position, 298 they are identical. */ 299 300 if (0 < same_file (&stat_buf[0], &stat_buf[1]) 301 && same_file_attributes (&stat_buf[0], &stat_buf[1]) 302 && file_position (0) == file_position (1)) 303 return EXIT_SUCCESS; 304 305 /* If output is redirected to the null device, we may assume `-s'. */ 306 307 if (comparison_type != type_status) 308 { 309 struct stat outstat, nullstat; 310 311 if (fstat (STDOUT_FILENO, &outstat) == 0 312 && stat (NULL_DEVICE, &nullstat) == 0 313 && 0 < same_file (&outstat, &nullstat)) 314 comparison_type = type_status; 315 } 316 317 /* If only a return code is needed, 318 and if both input descriptors are associated with plain files, 319 conclude that the files differ if they have different sizes 320 and if more bytes will be compared than are in the smaller file. */ 321 322 if (comparison_type == type_status 323 && S_ISREG (stat_buf[0].st_mode) 324 && S_ISREG (stat_buf[1].st_mode)) 325 { 326 off_t s0 = stat_buf[0].st_size - file_position (0); 327 off_t s1 = stat_buf[1].st_size - file_position (1); 328 if (s0 < 0) 329 s0 = 0; 330 if (s1 < 0) 331 s1 = 0; 332 if (s0 != s1 && MIN (s0, s1) < bytes) 333 exit (EXIT_FAILURE); 334 } 335 336 /* Get the optimal block size of the files. */ 337 338 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]), 339 STAT_BLOCKSIZE (stat_buf[1]), 340 PTRDIFF_MAX - sizeof (word)); 341 342 /* Allocate word-aligned buffers, with space for sentinels at the end. */ 343 344 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word); 345 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer); 346 buffer[1] = buffer[0] + words_per_buffer; 347 348 exit_status = cmp (); 349 350 for (f = 0; f < 2; f++) 351 if (close (file_desc[f]) != 0) 352 error (EXIT_TROUBLE, errno, "%s", file[f]); 353 if (exit_status != 0 && comparison_type != type_status) 354 check_stdout (); 355 exit (exit_status); 356 return exit_status; 357} 358 359/* Compare the two files already open on `file_desc[0]' and `file_desc[1]', 360 using `buffer[0]' and `buffer[1]'. 361 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different, 362 >1 if error. */ 363 364static int 365cmp (void) 366{ 367 off_t line_number = 1; /* Line number (1...) of difference. */ 368 off_t byte_number = 1; /* Byte number (1...) of difference. */ 369 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */ 370 size_t read0, read1; /* Number of bytes read from each file. */ 371 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */ 372 size_t smaller; /* The lesser of `read0' and `read1'. */ 373 word *buffer0 = buffer[0]; 374 word *buffer1 = buffer[1]; 375 char *buf0 = (char *) buffer0; 376 char *buf1 = (char *) buffer1; 377 int ret = EXIT_SUCCESS; 378 int f; 379 int offset_width; 380 381 if (comparison_type == type_all_diffs) 382 { 383 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t)); 384 385 for (f = 0; f < 2; f++) 386 if (S_ISREG (stat_buf[f].st_mode)) 387 { 388 off_t file_bytes = stat_buf[f].st_size - file_position (f); 389 if (file_bytes < byte_number_max) 390 byte_number_max = file_bytes; 391 } 392 393 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++) 394 continue; 395 } 396 397 for (f = 0; f < 2; f++) 398 { 399 off_t ig = ignore_initial[f]; 400 if (ig && file_position (f) == -1) 401 { 402 /* lseek failed; read and discard the ignored initial prefix. */ 403 do 404 { 405 size_t bytes_to_read = MIN (ig, buf_size); 406 size_t r = block_read (file_desc[f], buf0, bytes_to_read); 407 if (r != bytes_to_read) 408 { 409 if (r == SIZE_MAX) 410 error (EXIT_TROUBLE, errno, "%s", file[f]); 411 break; 412 } 413 ig -= r; 414 } 415 while (ig); 416 } 417 } 418 419 do 420 { 421 size_t bytes_to_read = buf_size; 422 423 if (remaining != UINTMAX_MAX) 424 { 425 if (remaining < bytes_to_read) 426 bytes_to_read = remaining; 427 remaining -= bytes_to_read; 428 } 429 430 read0 = block_read (file_desc[0], buf0, bytes_to_read); 431 if (read0 == SIZE_MAX) 432 error (EXIT_TROUBLE, errno, "%s", file[0]); 433 read1 = block_read (file_desc[1], buf1, bytes_to_read); 434 if (read1 == SIZE_MAX) 435 error (EXIT_TROUBLE, errno, "%s", file[1]); 436 437 /* Insert sentinels for the block compare. */ 438 439 buf0[read0] = ~buf1[read0]; 440 buf1[read1] = ~buf0[read1]; 441 442 /* If the line number should be written for differing files, 443 compare the blocks and count the number of newlines 444 simultaneously. */ 445 first_diff = (comparison_type == type_first_diff 446 ? block_compare_and_count (buffer0, buffer1, &line_number) 447 : block_compare (buffer0, buffer1)); 448 449 byte_number += first_diff; 450 smaller = MIN (read0, read1); 451 452 if (first_diff < smaller) 453 { 454 switch (comparison_type) 455 { 456 case type_first_diff: 457 { 458 char byte_buf[INT_BUFSIZE_BOUND (off_t)]; 459 char line_buf[INT_BUFSIZE_BOUND (off_t)]; 460 char const *byte_num = offtostr (byte_number, byte_buf); 461 char const *line_num = offtostr (line_number, line_buf); 462 if (!opt_print_bytes) 463 { 464 /* See POSIX 1003.1-2001 for this format. This 465 message is used only in the POSIX locale, so it 466 need not be translated. */ 467 static char const char_message[] = 468 "%s %s differ: char %s, line %s\n"; 469 470 /* The POSIX rationale recommends using the word 471 "byte" outside the POSIX locale. Some gettext 472 implementations translate even in the POSIX 473 locale if certain other environment variables 474 are set, so use "byte" if a translation is 475 available, or if outside the POSIX locale. */ 476 static char const byte_msgid[] = 477 N_("%s %s differ: byte %s, line %s\n"); 478 char const *byte_message = _(byte_msgid); 479 bool use_byte_message = (byte_message != byte_msgid 480 || hard_locale_LC_MESSAGES); 481 482 printf ((use_byte_message 483 ? byte_message 484 : "%s %s differ: char %s, line %s\n"), 485 file[0], file[1], byte_num, line_num); 486 } 487 else 488 { 489 unsigned char c0 = buf0[first_diff]; 490 unsigned char c1 = buf1[first_diff]; 491 char s0[5]; 492 char s1[5]; 493 sprintc (s0, c0); 494 sprintc (s1, c1); 495 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"), 496 file[0], file[1], byte_num, line_num, 497 c0, s0, c1, s1); 498 } 499 } 500 /* Fall through. */ 501 case type_status: 502 return EXIT_FAILURE; 503 504 case type_all_diffs: 505 do 506 { 507 unsigned char c0 = buf0[first_diff]; 508 unsigned char c1 = buf1[first_diff]; 509 if (c0 != c1) 510 { 511 char byte_buf[INT_BUFSIZE_BOUND (off_t)]; 512 char const *byte_num = offtostr (byte_number, byte_buf); 513 if (!opt_print_bytes) 514 { 515 /* See POSIX 1003.1-2001 for this format. */ 516 printf ("%*s %3o %3o\n", 517 offset_width, byte_num, c0, c1); 518 } 519 else 520 { 521 char s0[5]; 522 char s1[5]; 523 sprintc (s0, c0); 524 sprintc (s1, c1); 525 printf ("%*s %3o %-4s %3o %s\n", 526 offset_width, byte_num, c0, s0, c1, s1); 527 } 528 } 529 byte_number++; 530 first_diff++; 531 } 532 while (first_diff < smaller); 533 ret = EXIT_FAILURE; 534 break; 535 } 536 } 537 538 if (read0 != read1) 539 { 540 if (comparison_type != type_status) 541 { 542 /* See POSIX 1003.1-2001 for this format. */ 543 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]); 544 } 545 546 return EXIT_FAILURE; 547 } 548 } 549 while (read0 == buf_size); 550 551 return ret; 552} 553 554/* Compare two blocks of memory P0 and P1 until they differ, 555 and count the number of '\n' occurrences in the common 556 part of P0 and P1. 557 If the blocks are not guaranteed to be different, put sentinels at the ends 558 of the blocks before calling this function. 559 560 Return the offset of the first byte that differs. 561 Increment *COUNT by the count of '\n' occurrences. */ 562 563static size_t 564block_compare_and_count (word const *p0, word const *p1, off_t *count) 565{ 566 word l; /* One word from first buffer. */ 567 word const *l0, *l1; /* Pointers into each buffer. */ 568 char const *c0, *c1; /* Pointers for finding exact address. */ 569 size_t cnt = 0; /* Number of '\n' occurrences. */ 570 word nnnn; /* Newline, sizeof (word) times. */ 571 int i; 572 573 nnnn = 0; 574 for (i = 0; i < sizeof nnnn; i++) 575 nnnn = (nnnn << CHAR_BIT) | '\n'; 576 577 /* Find the rough position of the first difference by reading words, 578 not bytes. */ 579 580 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++) 581 { 582 l ^= nnnn; 583 for (i = 0; i < sizeof l; i++) 584 { 585 cnt += ! (unsigned char) l; 586 l >>= CHAR_BIT; 587 } 588 } 589 590 /* Find the exact differing position (endianness independent). */ 591 592 for (c0 = (char const *) l0, c1 = (char const *) l1; 593 *c0 == *c1; 594 c0++, c1++) 595 cnt += *c0 == '\n'; 596 597 *count += cnt; 598 return c0 - (char const *) p0; 599} 600 601/* Compare two blocks of memory P0 and P1 until they differ. 602 If the blocks are not guaranteed to be different, put sentinels at the ends 603 of the blocks before calling this function. 604 605 Return the offset of the first byte that differs. */ 606 607static size_t 608block_compare (word const *p0, word const *p1) 609{ 610 word const *l0, *l1; 611 char const *c0, *c1; 612 613 /* Find the rough position of the first difference by reading words, 614 not bytes. */ 615 616 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++) 617 continue; 618 619 /* Find the exact differing position (endianness independent). */ 620 621 for (c0 = (char const *) l0, c1 = (char const *) l1; 622 *c0 == *c1; 623 c0++, c1++) 624 continue; 625 626 return c0 - (char const *) p0; 627} 628 629/* Put into BUF the unsigned char C, making unprintable bytes 630 visible by quoting like cat -t does. */ 631 632static void 633sprintc (char *buf, unsigned char c) 634{ 635 if (! ISPRINT (c)) 636 { 637 if (c >= 128) 638 { 639 *buf++ = 'M'; 640 *buf++ = '-'; 641 c -= 128; 642 } 643 if (c < 32) 644 { 645 *buf++ = '^'; 646 c += 64; 647 } 648 else if (c == 127) 649 { 650 *buf++ = '^'; 651 c = '?'; 652 } 653 } 654 655 *buf++ = c; 656 *buf = 0; 657} 658 659/* Position file F to ignore_initial[F] bytes from its initial position, 660 and yield its new position. Don't try more than once. */ 661 662static off_t 663file_position (int f) 664{ 665 static bool positioned[2]; 666 static off_t position[2]; 667 668 if (! positioned[f]) 669 { 670 positioned[f] = 1; 671 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR); 672 } 673 return position[f]; 674} 675