1/* $NetBSD: cmp.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $ */ 2 3/* cmp - compare two files byte by byte 4 5 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001, 6 2002 Free Software Foundation, Inc. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 16 See the GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; see the file COPYING. 20 If not, write to the Free Software Foundation, 21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23#include "system.h" 24 25#include <stdio.h> 26#include <cmpbuf.h> 27#include <c-stack.h> 28#include <error.h> 29#include <exitfail.h> 30#include <freesoft.h> 31#include <getopt.h> 32#include <hard-locale.h> 33#include <inttostr.h> 34#include <setmode.h> 35#include <xalloc.h> 36#include <xstrtol.h> 37 38#if defined LC_MESSAGES && ENABLE_NLS 39# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES) 40#else 41# define hard_locale_LC_MESSAGES 0 42#endif 43 44static char const authorship_msgid[] = 45 N_("Written by Torbjorn Granlund and David MacKenzie."); 46 47static char const copyright_string[] = 48 "Copyright (C) 2002 Free Software Foundation, Inc."; 49 50extern char const version_string[]; 51 52static int cmp (void); 53static off_t file_position (int); 54static size_t block_compare (word const *, word const *); 55static size_t block_compare_and_count (word const *, word const *, off_t *); 56static void sprintc (char *, unsigned char); 57 58/* Name under which this program was invoked. */ 59char *program_name; 60 61/* Filenames of the compared files. */ 62static char const *file[2]; 63 64/* File descriptors of the files. */ 65static int file_desc[2]; 66 67/* Status of the files. */ 68static struct stat stat_buf[2]; 69 70/* Read buffers for the files. */ 71static word *buffer[2]; 72 73/* Optimal block size for the files. */ 74static size_t buf_size; 75 76/* Initial prefix to ignore for each file. */ 77static off_t ignore_initial[2]; 78 79/* Number of bytes to compare. */ 80static uintmax_t bytes = UINTMAX_MAX; 81 82/* Output format. */ 83static enum comparison_type 84 { 85 type_first_diff, /* Print the first difference. */ 86 type_all_diffs, /* Print all differences. */ 87 type_status /* Exit status only. */ 88 } comparison_type; 89 90/* If nonzero, print values of bytes quoted like cat -t does. */ 91static bool opt_print_bytes; 92 93/* Values for long options that do not have single-letter equivalents. */ 94enum 95{ 96 HELP_OPTION = CHAR_MAX + 1 97}; 98 99static struct option const long_options[] = 100{ 101 {"print-bytes", 0, 0, 'b'}, 102 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */ 103 {"ignore-initial", 1, 0, 'i'}, 104 {"verbose", 0, 0, 'l'}, 105 {"bytes", 1, 0, 'n'}, 106 {"silent", 0, 0, 's'}, 107 {"quiet", 0, 0, 's'}, 108 {"version", 0, 0, 'v'}, 109 {"help", 0, 0, HELP_OPTION}, 110 {0, 0, 0, 0} 111}; 112 113static void try_help (char const *, char const *) __attribute__((noreturn)); 114static void 115try_help (char const *reason_msgid, char const *operand) 116{ 117 if (reason_msgid) 118 error (0, 0, _(reason_msgid), operand); 119 error (EXIT_TROUBLE, 0, 120 _("Try `%s --help' for more information."), program_name); 121 abort (); 122} 123 124static char const valid_suffixes[] = "kKMGTPEZY0"; 125 126/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to 127 point after the operand. If DELIMITER is nonzero, the operand may 128 be followed by DELIMITER; otherwise it must be null-terminated. */ 129static off_t 130parse_ignore_initial (char **argptr, char delimiter) 131{ 132 uintmax_t val; 133 off_t o; 134 char const *arg = *argptr; 135 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes); 136 if (! (e == LONGINT_OK 137 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter)) 138 || (o = val) < 0 || o != val || val == UINTMAX_MAX) 139 try_help ("invalid --ignore-initial value `%s'", arg); 140 return o; 141} 142 143/* Specify the output format. */ 144static void 145specify_comparison_type (enum comparison_type t) 146{ 147 if (comparison_type) 148 try_help ("options -l and -s are incompatible", 0); 149 comparison_type = t; 150} 151 152static void 153check_stdout (void) 154{ 155 if (ferror (stdout)) 156 error (EXIT_TROUBLE, 0, "%s", _("write failed")); 157 else if (fclose (stdout) != 0) 158 error (EXIT_TROUBLE, errno, "%s", _("standard output")); 159} 160 161static char const * const option_help_msgid[] = { 162 N_("-b --print-bytes Print differing bytes."), 163 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."), 164 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"), 165 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."), 166 N_("-l --verbose Output byte numbers and values of all differing bytes."), 167 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."), 168 N_("-s --quiet --silent Output nothing; yield exit status only."), 169 N_("-v --version Output version info."), 170 N_("--help Output this help."), 171 0 172}; 173 174static void 175usage (void) 176{ 177 char const * const *p; 178 179 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"), 180 program_name); 181 printf ("%s\n\n", _("Compare two files byte by byte.")); 182 for (p = option_help_msgid; *p; p++) 183 printf (" %s\n", _(*p)); 184 printf ("\n%s\n%s\n\n%s\n\n%s\n", 185 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."), 186 _("SKIP values may be followed by the following multiplicative suffixes:\n\ 187kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\ 188GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."), 189 _("If a FILE is `-' or missing, read standard input."), 190 _("Report bugs to <bug-gnu-utils@gnu.org>.")); 191} 192 193int 194main (int argc, char **argv) 195{ 196 int c, f, exit_status; 197 size_t words_per_buffer; 198 199 exit_failure = EXIT_TROUBLE; 200 initialize_main (&argc, &argv); 201 program_name = argv[0]; 202 setlocale (LC_ALL, ""); 203 bindtextdomain (PACKAGE, LOCALEDIR); 204 textdomain (PACKAGE); 205 c_stack_action (c_stack_die); 206 207 /* Parse command line options. */ 208 209 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0)) 210 != -1) 211 switch (c) 212 { 213 case 'b': 214 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */ 215 opt_print_bytes = 1; 216 break; 217 218 case 'i': 219 ignore_initial[0] = parse_ignore_initial (&optarg, ':'); 220 ignore_initial[1] = (*optarg++ == ':' 221 ? parse_ignore_initial (&optarg, 0) 222 : ignore_initial[0]); 223 break; 224 225 case 'l': 226 specify_comparison_type (type_all_diffs); 227 break; 228 229 case 'n': 230 { 231 uintmax_t n; 232 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK) 233 try_help ("invalid --bytes value `%s'", optarg); 234 if (n < bytes) 235 bytes = n; 236 } 237 break; 238 239 case 's': 240 specify_comparison_type (type_status); 241 break; 242 243 case 'v': 244 printf ("cmp %s\n%s\n\n%s\n\n%s\n", 245 version_string, copyright_string, 246 _(free_software_msgid), _(authorship_msgid)); 247 check_stdout (); 248 return EXIT_SUCCESS; 249 250 case HELP_OPTION: 251 usage (); 252 check_stdout (); 253 return EXIT_SUCCESS; 254 255 default: 256 try_help (0, 0); 257 } 258 259 if (optind == argc) 260 try_help ("missing operand after `%s'", argv[argc - 1]); 261 262 file[0] = argv[optind++]; 263 file[1] = optind < argc ? argv[optind++] : "-"; 264 265 for (f = 0; f < 2 && optind < argc; f++) 266 { 267 char *arg = argv[optind++]; 268 ignore_initial[f] = parse_ignore_initial (&arg, 0); 269 } 270 271 if (optind < argc) 272 try_help ("extra operand `%s'", argv[optind]); 273 274 for (f = 0; f < 2; f++) 275 { 276 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if 277 stdin is closed and opening file[0] yields file descriptor 0. */ 278 int f1 = f ^ (strcmp (file[1], "-") == 0); 279 280 /* Two files with the same name are identical. 281 But wait until we open the file once, for proper diagnostics. */ 282 if (f && file_name_cmp (file[0], file[1]) == 0) 283 return EXIT_SUCCESS; 284 285 file_desc[f1] = (strcmp (file[f1], "-") == 0 286 ? STDIN_FILENO 287 : open (file[f1], O_RDONLY, 0)); 288 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0) 289 { 290 if (file_desc[f1] < 0 && comparison_type == type_status) 291 exit (EXIT_TROUBLE); 292 else 293 error (EXIT_TROUBLE, errno, "%s", file[f1]); 294 } 295 296 set_binary_mode (file_desc[f1], 1); 297 } 298 299 /* If the files are links to the same inode and have the same file position, 300 they are identical. */ 301 302 if (0 < same_file (&stat_buf[0], &stat_buf[1]) 303 && same_file_attributes (&stat_buf[0], &stat_buf[1]) 304 && file_position (0) == file_position (1)) 305 return EXIT_SUCCESS; 306 307 /* If output is redirected to the null device, we may assume `-s'. */ 308 309 if (comparison_type != type_status) 310 { 311 struct stat outstat, nullstat; 312 313 if (fstat (STDOUT_FILENO, &outstat) == 0 314 && stat (NULL_DEVICE, &nullstat) == 0 315 && 0 < same_file (&outstat, &nullstat)) 316 comparison_type = type_status; 317 } 318 319 /* If only a return code is needed, 320 and if both input descriptors are associated with plain files, 321 conclude that the files differ if they have different sizes 322 and if more bytes will be compared than are in the smaller file. */ 323 324 if (comparison_type == type_status 325 && S_ISREG (stat_buf[0].st_mode) 326 && S_ISREG (stat_buf[1].st_mode)) 327 { 328 off_t s0 = stat_buf[0].st_size - file_position (0); 329 off_t s1 = stat_buf[1].st_size - file_position (1); 330 if (s0 < 0) 331 s0 = 0; 332 if (s1 < 0) 333 s1 = 0; 334 if (s0 != s1 && MIN (s0, s1) < bytes) 335 exit (EXIT_FAILURE); 336 } 337 338 /* Get the optimal block size of the files. */ 339 340 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]), 341 STAT_BLOCKSIZE (stat_buf[1]), 342 PTRDIFF_MAX - sizeof (word)); 343 344 /* Allocate word-aligned buffers, with space for sentinels at the end. */ 345 346 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word); 347 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer); 348 buffer[1] = buffer[0] + words_per_buffer; 349 350 exit_status = cmp (); 351 352 for (f = 0; f < 2; f++) 353 if (close (file_desc[f]) != 0) 354 error (EXIT_TROUBLE, errno, "%s", file[f]); 355 if (exit_status != 0 && comparison_type != type_status) 356 check_stdout (); 357 exit (exit_status); 358 return exit_status; 359} 360 361/* Compare the two files already open on `file_desc[0]' and `file_desc[1]', 362 using `buffer[0]' and `buffer[1]'. 363 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different, 364 >1 if error. */ 365 366static int 367cmp (void) 368{ 369 off_t line_number = 1; /* Line number (1...) of difference. */ 370 off_t byte_number = 1; /* Byte number (1...) of difference. */ 371 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */ 372 size_t read0, read1; /* Number of bytes read from each file. */ 373 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */ 374 size_t smaller; /* The lesser of `read0' and `read1'. */ 375 word *buffer0 = buffer[0]; 376 word *buffer1 = buffer[1]; 377 char *buf0 = (char *) buffer0; 378 char *buf1 = (char *) buffer1; 379 int ret = EXIT_SUCCESS; 380 int f; 381 int offset_width; 382 383 if (comparison_type == type_all_diffs) 384 { 385 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t)); 386 387 for (f = 0; f < 2; f++) 388 if (S_ISREG (stat_buf[f].st_mode)) 389 { 390 off_t file_bytes = stat_buf[f].st_size - file_position (f); 391 if (file_bytes < byte_number_max) 392 byte_number_max = file_bytes; 393 } 394 395 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++) 396 continue; 397 } 398 399 for (f = 0; f < 2; f++) 400 { 401 off_t ig = ignore_initial[f]; 402 if (ig && file_position (f) == -1) 403 { 404 /* lseek failed; read and discard the ignored initial prefix. */ 405 do 406 { 407 size_t bytes_to_read = MIN (ig, buf_size); 408 size_t r = block_read (file_desc[f], buf0, bytes_to_read); 409 if (r != bytes_to_read) 410 { 411 if (r == SIZE_MAX) 412 error (EXIT_TROUBLE, errno, "%s", file[f]); 413 break; 414 } 415 ig -= r; 416 } 417 while (ig); 418 } 419 } 420 421 do 422 { 423 size_t bytes_to_read = buf_size; 424 425 if (remaining != UINTMAX_MAX) 426 { 427 if (remaining < bytes_to_read) 428 bytes_to_read = remaining; 429 remaining -= bytes_to_read; 430 } 431 432 read0 = block_read (file_desc[0], buf0, bytes_to_read); 433 if (read0 == SIZE_MAX) 434 error (EXIT_TROUBLE, errno, "%s", file[0]); 435 read1 = block_read (file_desc[1], buf1, bytes_to_read); 436 if (read1 == SIZE_MAX) 437 error (EXIT_TROUBLE, errno, "%s", file[1]); 438 439 /* Insert sentinels for the block compare. */ 440 441 buf0[read0] = ~buf1[read0]; 442 buf1[read1] = ~buf0[read1]; 443 444 /* If the line number should be written for differing files, 445 compare the blocks and count the number of newlines 446 simultaneously. */ 447 first_diff = (comparison_type == type_first_diff 448 ? block_compare_and_count (buffer0, buffer1, &line_number) 449 : block_compare (buffer0, buffer1)); 450 451 byte_number += first_diff; 452 smaller = MIN (read0, read1); 453 454 if (first_diff < smaller) 455 { 456 switch (comparison_type) 457 { 458 case type_first_diff: 459 { 460 char byte_buf[INT_BUFSIZE_BOUND (off_t)]; 461 char line_buf[INT_BUFSIZE_BOUND (off_t)]; 462 char const *byte_num = offtostr (byte_number, byte_buf); 463 char const *line_num = offtostr (line_number, line_buf); 464 if (!opt_print_bytes) 465 { 466 /* See POSIX 1003.1-2001 for this format. This 467 message is used only in the POSIX locale, so it 468 need not be translated. */ 469 static char const char_message[] = 470 "%s %s differ: char %s, line %s\n"; 471 472 /* The POSIX rationale recommends using the word 473 "byte" outside the POSIX locale. Some gettext 474 implementations translate even in the POSIX 475 locale if certain other environment variables 476 are set, so use "byte" if a translation is 477 available, or if outside the POSIX locale. */ 478 static char const byte_msgid[] = 479 N_("%s %s differ: byte %s, line %s\n"); 480 char const *byte_message = _(byte_msgid); 481 bool use_byte_message = (byte_message != byte_msgid 482 || hard_locale_LC_MESSAGES); 483 484 printf ((use_byte_message 485 ? byte_message 486 : "%s %s differ: char %s, line %s\n"), 487 file[0], file[1], byte_num, line_num); 488 } 489 else 490 { 491 unsigned char c0 = buf0[first_diff]; 492 unsigned char c1 = buf1[first_diff]; 493 char s0[5]; 494 char s1[5]; 495 sprintc (s0, c0); 496 sprintc (s1, c1); 497 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"), 498 file[0], file[1], byte_num, line_num, 499 c0, s0, c1, s1); 500 } 501 } 502 /* Fall through. */ 503 case type_status: 504 return EXIT_FAILURE; 505 506 case type_all_diffs: 507 do 508 { 509 unsigned char c0 = buf0[first_diff]; 510 unsigned char c1 = buf1[first_diff]; 511 if (c0 != c1) 512 { 513 char byte_buf[INT_BUFSIZE_BOUND (off_t)]; 514 char const *byte_num = offtostr (byte_number, byte_buf); 515 if (!opt_print_bytes) 516 { 517 /* See POSIX 1003.1-2001 for this format. */ 518 printf ("%*s %3o %3o\n", 519 offset_width, byte_num, c0, c1); 520 } 521 else 522 { 523 char s0[5]; 524 char s1[5]; 525 sprintc (s0, c0); 526 sprintc (s1, c1); 527 printf ("%*s %3o %-4s %3o %s\n", 528 offset_width, byte_num, c0, s0, c1, s1); 529 } 530 } 531 byte_number++; 532 first_diff++; 533 } 534 while (first_diff < smaller); 535 ret = EXIT_FAILURE; 536 break; 537 } 538 } 539 540 if (read0 != read1) 541 { 542 if (comparison_type != type_status) 543 { 544 /* See POSIX 1003.1-2001 for this format. */ 545 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]); 546 } 547 548 return EXIT_FAILURE; 549 } 550 } 551 while (read0 == buf_size); 552 553 return ret; 554} 555 556/* Compare two blocks of memory P0 and P1 until they differ, 557 and count the number of '\n' occurrences in the common 558 part of P0 and P1. 559 If the blocks are not guaranteed to be different, put sentinels at the ends 560 of the blocks before calling this function. 561 562 Return the offset of the first byte that differs. 563 Increment *COUNT by the count of '\n' occurrences. */ 564 565static size_t 566block_compare_and_count (word const *p0, word const *p1, off_t *count) 567{ 568 word l; /* One word from first buffer. */ 569 word const *l0, *l1; /* Pointers into each buffer. */ 570 char const *c0, *c1; /* Pointers for finding exact address. */ 571 size_t cnt = 0; /* Number of '\n' occurrences. */ 572 word nnnn; /* Newline, sizeof (word) times. */ 573 int i; 574 575 nnnn = 0; 576 for (i = 0; i < sizeof nnnn; i++) 577 nnnn = (nnnn << CHAR_BIT) | '\n'; 578 579 /* Find the rough position of the first difference by reading words, 580 not bytes. */ 581 582 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++) 583 { 584 l ^= nnnn; 585 for (i = 0; i < sizeof l; i++) 586 { 587 cnt += ! (unsigned char) l; 588 l >>= CHAR_BIT; 589 } 590 } 591 592 /* Find the exact differing position (endianness independent). */ 593 594 for (c0 = (char const *) l0, c1 = (char const *) l1; 595 *c0 == *c1; 596 c0++, c1++) 597 cnt += *c0 == '\n'; 598 599 *count += cnt; 600 return c0 - (char const *) p0; 601} 602 603/* Compare two blocks of memory P0 and P1 until they differ. 604 If the blocks are not guaranteed to be different, put sentinels at the ends 605 of the blocks before calling this function. 606 607 Return the offset of the first byte that differs. */ 608 609static size_t 610block_compare (word const *p0, word const *p1) 611{ 612 word const *l0, *l1; 613 char const *c0, *c1; 614 615 /* Find the rough position of the first difference by reading words, 616 not bytes. */ 617 618 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++) 619 continue; 620 621 /* Find the exact differing position (endianness independent). */ 622 623 for (c0 = (char const *) l0, c1 = (char const *) l1; 624 *c0 == *c1; 625 c0++, c1++) 626 continue; 627 628 return c0 - (char const *) p0; 629} 630 631/* Put into BUF the unsigned char C, making unprintable bytes 632 visible by quoting like cat -t does. */ 633 634static void 635sprintc (char *buf, unsigned char c) 636{ 637 if (! ISPRINT (c)) 638 { 639 if (c >= 128) 640 { 641 *buf++ = 'M'; 642 *buf++ = '-'; 643 c -= 128; 644 } 645 if (c < 32) 646 { 647 *buf++ = '^'; 648 c += 64; 649 } 650 else if (c == 127) 651 { 652 *buf++ = '^'; 653 c = '?'; 654 } 655 } 656 657 *buf++ = c; 658 *buf = 0; 659} 660 661/* Position file F to ignore_initial[F] bytes from its initial position, 662 and yield its new position. Don't try more than once. */ 663 664static off_t 665file_position (int f) 666{ 667 static bool positioned[2]; 668 static off_t position[2]; 669 670 if (! positioned[f]) 671 { 672 positioned[f] = 1; 673 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR); 674 } 675 return position[f]; 676} 677