1/* $NetBSD: diff.c,v 1.2 2006/01/14 09:18:17 apb Exp $ */ 2 3/* diff - compare files line by line 4 5 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002 6 Free Software Foundation, Inc. 7 8 This file is part of GNU DIFF. 9 10 GNU DIFF is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 GNU DIFF is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 18 See the GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with GNU DIFF; see the file COPYING. 22 If not, write to the Free Software Foundation, 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24 25#define GDIFF_MAIN 26#include "diff.h" 27#include <c-stack.h> 28#include <dirname.h> 29#include <error.h> 30#include <exclude.h> 31#include <exitfail.h> 32#include <fnmatch.h> 33#include <freesoft.h> 34#include <getopt.h> 35#include <hard-locale.h> 36#include <prepargs.h> 37#include <quotesys.h> 38#include <regex.h> 39#include <setmode.h> 40#include <xalloc.h> 41#include <posixver.h> 42 43static char const authorship_msgid[] = 44 N_("Written by Paul Eggert, Mike Haertel, David Hayes,\n\ 45Richard Stallman, and Len Tower."); 46 47static char const copyright_string[] = 48 "Copyright (C) 2002 Free Software Foundation, Inc."; 49 50#ifndef GUTTER_WIDTH_MINIMUM 51# define GUTTER_WIDTH_MINIMUM 3 52#endif 53 54struct regexp_list 55{ 56 char *regexps; /* chars representing disjunction of the regexps */ 57 size_t len; /* chars used in `regexps' */ 58 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */ 59 bool multiple_regexps;/* Does `regexps' represent a disjunction? */ 60 struct re_pattern_buffer *buf; 61}; 62 63static int compare_files (struct comparison const *, char const *, char const *); 64static void add_regexp (struct regexp_list *, char const *); 65static void summarize_regexp_list (struct regexp_list *); 66static void specify_style (enum output_style); 67static void specify_value (char const **, char const *, char const *); 68static void try_help (char const *, char const *) __attribute__((noreturn)); 69static void check_stdout (void); 70static void usage (void); 71 72/* If comparing directories, compare their common subdirectories 73 recursively. */ 74static bool recursive; 75 76/* In context diffs, show previous lines that match these regexps. */ 77static struct regexp_list function_regexp_list; 78 79/* Ignore changes affecting only lines that match these regexps. */ 80static struct regexp_list ignore_regexp_list; 81 82#if HAVE_SETMODE_DOS 83/* Use binary I/O when reading and writing data (--binary). 84 On POSIX hosts, this has no effect. */ 85static bool binary; 86#endif 87 88/* When comparing directories, if a file appears only in one 89 directory, treat it as present but empty in the other (-N). 90 Then `patch' would create the file with appropriate contents. */ 91static bool new_file; 92 93/* When comparing directories, if a file appears only in the second 94 directory of the two, treat it as present but empty in the other 95 (--unidirectional-new-file). 96 Then `patch' would create the file with appropriate contents. */ 97static bool unidirectional_new_file; 98 99/* Report files compared that are the same (-s). 100 Normally nothing is output when that happens. */ 101static bool report_identical_files; 102 103 104/* Return a string containing the command options with which diff was invoked. 105 Spaces appear between what were separate ARGV-elements. 106 There is a space at the beginning but none at the end. 107 If there were no options, the result is an empty string. 108 109 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, 110 the length of that vector. */ 111 112static char * 113option_list (char **optionvec, int count) 114{ 115 int i; 116 size_t size = 1; 117 char *result; 118 char *p; 119 120 for (i = 0; i < count; i++) 121 size += 1 + quote_system_arg ((char *) 0, optionvec[i]); 122 123 p = result = xmalloc (size); 124 125 for (i = 0; i < count; i++) 126 { 127 *p++ = ' '; 128 p += quote_system_arg (p, optionvec[i]); 129 } 130 131 *p = 0; 132 return result; 133} 134 135 136/* Return an option value suitable for add_exclude. */ 137 138static int 139exclude_options (void) 140{ 141 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); 142} 143 144static char const shortopts[] = 145"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y"; 146 147/* Values for long options that do not have single-letter equivalents. */ 148enum 149{ 150 BINARY_OPTION = CHAR_MAX + 1, 151 FROM_FILE_OPTION, 152 HELP_OPTION, 153 HORIZON_LINES_OPTION, 154 IGNORE_FILE_NAME_CASE_OPTION, 155 INHIBIT_HUNK_MERGE_OPTION, 156 LEFT_COLUMN_OPTION, 157 LINE_FORMAT_OPTION, 158 NO_IGNORE_FILE_NAME_CASE_OPTION, 159 NORMAL_OPTION, 160 SDIFF_MERGE_ASSIST_OPTION, 161 STRIP_TRAILING_CR_OPTION, 162 SUPPRESS_COMMON_LINES_OPTION, 163 TO_FILE_OPTION, 164 165 /* These options must be in sequence. */ 166 UNCHANGED_LINE_FORMAT_OPTION, 167 OLD_LINE_FORMAT_OPTION, 168 NEW_LINE_FORMAT_OPTION, 169 170 /* These options must be in sequence. */ 171 UNCHANGED_GROUP_FORMAT_OPTION, 172 OLD_GROUP_FORMAT_OPTION, 173 NEW_GROUP_FORMAT_OPTION, 174 CHANGED_GROUP_FORMAT_OPTION 175}; 176 177static char const group_format_option[][sizeof "--unchanged-group-format"] = 178 { 179 "--unchanged-group-format", 180 "--old-group-format", 181 "--new-group-format", 182 "--changed-group-format" 183 }; 184 185static char const line_format_option[][sizeof "--unchanged-line-format"] = 186 { 187 "--unchanged-line-format", 188 "--old-line-format", 189 "--new-line-format" 190 }; 191 192static struct option const longopts[] = 193{ 194 {"binary", 0, 0, BINARY_OPTION}, 195 {"brief", 0, 0, 'q'}, 196 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, 197 {"context", 2, 0, 'C'}, 198 {"ed", 0, 0, 'e'}, 199 {"exclude", 1, 0, 'x'}, 200 {"exclude-from", 1, 0, 'X'}, 201 {"expand-tabs", 0, 0, 't'}, 202 {"forward-ed", 0, 0, 'f'}, 203 {"from-file", 1, 0, FROM_FILE_OPTION}, 204 {"help", 0, 0, HELP_OPTION}, 205 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, 206 {"ifdef", 1, 0, 'D'}, 207 {"ignore-all-space", 0, 0, 'w'}, 208 {"ignore-blank-lines", 0, 0, 'B'}, 209 {"ignore-case", 0, 0, 'i'}, 210 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, 211 {"ignore-matching-lines", 1, 0, 'I'}, 212 {"ignore-space-change", 0, 0, 'b'}, 213 {"ignore-tab-expansion", 0, 0, 'E'}, 214 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, 215 {"initial-tab", 0, 0, 'T'}, 216 {"label", 1, 0, 'L'}, 217 {"left-column", 0, 0, LEFT_COLUMN_OPTION}, 218 {"line-format", 1, 0, LINE_FORMAT_OPTION}, 219 {"minimal", 0, 0, 'd'}, 220 {"new-file", 0, 0, 'N'}, 221 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, 222 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, 223 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, 224 {"normal", 0, 0, NORMAL_OPTION}, 225 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, 226 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, 227 {"paginate", 0, 0, 'l'}, 228 {"rcs", 0, 0, 'n'}, 229 {"recursive", 0, 0, 'r'}, 230 {"report-identical-files", 0, 0, 's'}, 231 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, 232 {"show-c-function", 0, 0, 'p'}, 233 {"show-function-line", 1, 0, 'F'}, 234 {"side-by-side", 0, 0, 'y'}, 235 {"speed-large-files", 0, 0, 'H'}, 236 {"starting-file", 1, 0, 'S'}, 237 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, 238 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, 239 {"text", 0, 0, 'a'}, 240 {"to-file", 1, 0, TO_FILE_OPTION}, 241 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, 242 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, 243 {"unidirectional-new-file", 0, 0, 'P'}, 244 {"unified", 2, 0, 'U'}, 245 {"version", 0, 0, 'v'}, 246 {"width", 1, 0, 'W'}, 247 {0, 0, 0, 0} 248}; 249 250int 251main (int argc, char **argv) 252{ 253 int exit_status = EXIT_SUCCESS; 254 int c; 255 int i; 256 int prev = -1; 257 lin ocontext = -1; 258 bool explicit_context = 0; 259 int width = 0; 260 bool show_c_function = 0; 261 char const *from_file = 0; 262 char const *to_file = 0; 263 uintmax_t numval; 264 char *numend; 265 266 /* Do our initializations. */ 267 exit_failure = 2; 268 initialize_main (&argc, &argv); 269 program_name = argv[0]; 270 setlocale (LC_ALL, ""); 271 bindtextdomain (PACKAGE, LOCALEDIR); 272 textdomain (PACKAGE); 273 c_stack_action (c_stack_die); 274 function_regexp_list.buf = &function_regexp; 275 ignore_regexp_list.buf = &ignore_regexp; 276 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); 277 excluded = new_exclude (); 278 279 /* Decode the options. */ 280 281 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1) 282 { 283 switch (c) 284 { 285 case 0: 286 break; 287 288 case '0': 289 case '1': 290 case '2': 291 case '3': 292 case '4': 293 case '5': 294 case '6': 295 case '7': 296 case '8': 297 case '9': 298 if (! ISDIGIT (prev)) 299 ocontext = c - '0'; 300 else if (LIN_MAX / 10 < ocontext 301 || ((ocontext = 10 * ocontext + c - '0') < 0)) 302 ocontext = LIN_MAX; 303 break; 304 305 case 'a': 306 text = 1; 307 break; 308 309 case 'b': 310 if (ignore_white_space < IGNORE_SPACE_CHANGE) 311 ignore_white_space = IGNORE_SPACE_CHANGE; 312 break; 313 314 case 'B': 315 ignore_blank_lines = 1; 316 break; 317 318 case 'C': /* +context[=lines] */ 319 case 'U': /* +unified[=lines] */ 320 { 321 if (optarg) 322 { 323 numval = strtoumax (optarg, &numend, 10); 324 if (*numend) 325 try_help ("invalid context length `%s'", optarg); 326 if (LIN_MAX < numval) 327 numval = LIN_MAX; 328 } 329 else 330 numval = 3; 331 332 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); 333 if (context < numval) 334 context = numval; 335 explicit_context = 1; 336 } 337 break; 338 339 case 'c': 340 specify_style (OUTPUT_CONTEXT); 341 if (context < 3) 342 context = 3; 343 break; 344 345 case 'd': 346 minimal = 1; 347 break; 348 349 case 'D': 350 specify_style (OUTPUT_IFDEF); 351 { 352 static char const C_ifdef_group_formats[] = 353 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; 354 char *b = xmalloc (sizeof C_ifdef_group_formats 355 + 7 * strlen (optarg) - 14 /* 7*"%s" */ 356 - 8 /* 5*"%%" + 3*"%c" */); 357 sprintf (b, C_ifdef_group_formats, 358 0, 359 optarg, optarg, 0, 360 optarg, optarg, 0, 361 optarg, optarg, optarg); 362 for (i = 0; i < sizeof group_format / sizeof *group_format; i++) 363 { 364 specify_value (&group_format[i], b, "-D"); 365 b += strlen (b) + 1; 366 } 367 } 368 break; 369 370 case 'e': 371 specify_style (OUTPUT_ED); 372 break; 373 374 case 'E': 375 if (ignore_white_space < IGNORE_TAB_EXPANSION) 376 ignore_white_space = IGNORE_TAB_EXPANSION; 377 break; 378 379 case 'f': 380 specify_style (OUTPUT_FORWARD_ED); 381 break; 382 383 case 'F': 384 add_regexp (&function_regexp_list, optarg); 385 break; 386 387 case 'h': 388 /* Split the files into chunks for faster processing. 389 Usually does not change the result. 390 391 This currently has no effect. */ 392 break; 393 394 case 'H': 395 speed_large_files = 1; 396 break; 397 398 case 'i': 399 ignore_case = 1; 400 break; 401 402 case 'I': 403 add_regexp (&ignore_regexp_list, optarg); 404 break; 405 406 case 'l': 407 if (!pr_program[0]) 408 try_help ("pagination not supported on this host", 0); 409 paginate = 1; 410#ifdef SIGCHLD 411 /* Pagination requires forking and waiting, and 412 System V fork+wait does not work if SIGCHLD is ignored. */ 413 signal (SIGCHLD, SIG_DFL); 414#endif 415 break; 416 417 case 'L': 418 if (!file_label[0]) 419 file_label[0] = optarg; 420 else if (!file_label[1]) 421 file_label[1] = optarg; 422 else 423 fatal ("too many file label options"); 424 break; 425 426 case 'n': 427 specify_style (OUTPUT_RCS); 428 break; 429 430 case 'N': 431 new_file = 1; 432 break; 433 434 case 'p': 435 show_c_function = 1; 436 add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); 437 break; 438 439 case 'P': 440 unidirectional_new_file = 1; 441 break; 442 443 case 'q': 444 brief = 1; 445 break; 446 447 case 'r': 448 recursive = 1; 449 break; 450 451 case 's': 452 report_identical_files = 1; 453 break; 454 455 case 'S': 456 specify_value (&starting_file, optarg, "-S"); 457 break; 458 459 case 't': 460 expand_tabs = 1; 461 break; 462 463 case 'T': 464 initial_tab = 1; 465 break; 466 467 case 'u': 468 specify_style (OUTPUT_UNIFIED); 469 if (context < 3) 470 context = 3; 471 break; 472 473 case 'v': 474 printf ("diff %s\n%s\n\n%s\n\n%s\n", 475 version_string, copyright_string, 476 _(free_software_msgid), _(authorship_msgid)); 477 check_stdout (); 478 return EXIT_SUCCESS; 479 480 case 'w': 481 ignore_white_space = IGNORE_ALL_SPACE; 482 break; 483 484 case 'x': 485 add_exclude (excluded, optarg, exclude_options ()); 486 break; 487 488 case 'X': 489 if (add_exclude_file (add_exclude, excluded, optarg, 490 exclude_options (), '\n')) 491 pfatal_with_name (optarg); 492 break; 493 494 case 'y': 495 specify_style (OUTPUT_SDIFF); 496 break; 497 498 case 'W': 499 numval = strtoumax (optarg, &numend, 10); 500 if (! (0 < numval && numval <= INT_MAX) || *numend) 501 try_help ("invalid width `%s'", optarg); 502 if (width != numval) 503 { 504 if (width) 505 fatal ("conflicting width options"); 506 width = numval; 507 } 508 break; 509 510 case BINARY_OPTION: 511#if HAVE_SETMODE_DOS 512 binary = 1; 513 set_binary_mode (STDOUT_FILENO, 1); 514#endif 515 break; 516 517 case FROM_FILE_OPTION: 518 specify_value (&from_file, optarg, "--from-file"); 519 break; 520 521 case HELP_OPTION: 522 usage (); 523 check_stdout (); 524 return EXIT_SUCCESS; 525 526 case HORIZON_LINES_OPTION: 527 numval = strtoumax (optarg, &numend, 10); 528 if (*numend) 529 try_help ("invalid horizon length `%s'", optarg); 530 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); 531 break; 532 533 case IGNORE_FILE_NAME_CASE_OPTION: 534 ignore_file_name_case = 1; 535 break; 536 537 case INHIBIT_HUNK_MERGE_OPTION: 538 /* This option is obsolete, but accept it for backward 539 compatibility. */ 540 break; 541 542 case LEFT_COLUMN_OPTION: 543 left_column = 1; 544 break; 545 546 case LINE_FORMAT_OPTION: 547 specify_style (OUTPUT_IFDEF); 548 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 549 specify_value (&line_format[i], optarg, "--line-format"); 550 break; 551 552 case NO_IGNORE_FILE_NAME_CASE_OPTION: 553 ignore_file_name_case = 0; 554 break; 555 556 case NORMAL_OPTION: 557 specify_style (OUTPUT_NORMAL); 558 break; 559 560 case SDIFF_MERGE_ASSIST_OPTION: 561 specify_style (OUTPUT_SDIFF); 562 sdiff_merge_assist = 1; 563 break; 564 565 case STRIP_TRAILING_CR_OPTION: 566 strip_trailing_cr = 1; 567 break; 568 569 case SUPPRESS_COMMON_LINES_OPTION: 570 suppress_common_lines = 1; 571 break; 572 573 case TO_FILE_OPTION: 574 specify_value (&to_file, optarg, "--to-file"); 575 break; 576 577 case UNCHANGED_LINE_FORMAT_OPTION: 578 case OLD_LINE_FORMAT_OPTION: 579 case NEW_LINE_FORMAT_OPTION: 580 specify_style (OUTPUT_IFDEF); 581 c -= UNCHANGED_LINE_FORMAT_OPTION; 582 specify_value (&line_format[c], optarg, line_format_option[c]); 583 break; 584 585 case UNCHANGED_GROUP_FORMAT_OPTION: 586 case OLD_GROUP_FORMAT_OPTION: 587 case NEW_GROUP_FORMAT_OPTION: 588 case CHANGED_GROUP_FORMAT_OPTION: 589 specify_style (OUTPUT_IFDEF); 590 c -= UNCHANGED_GROUP_FORMAT_OPTION; 591 specify_value (&group_format[c], optarg, group_format_option[c]); 592 break; 593 594 default: 595 try_help (0, 0); 596 } 597 prev = c; 598 } 599 600 if (output_style == OUTPUT_UNSPECIFIED) 601 { 602 if (show_c_function) 603 { 604 specify_style (OUTPUT_CONTEXT); 605 if (ocontext < 0) 606 context = 3; 607 } 608 else 609 specify_style (OUTPUT_NORMAL); 610 } 611 612 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) 613 time_format = "%Y-%m-%d %H:%M:%S.%N %z"; 614 else 615 { 616 /* See POSIX 1003.1-2001 for this format. */ 617 time_format = "%a %b %e %T %Y"; 618 } 619 620 if (0 <= ocontext) 621 { 622 bool modern_usage = 200112 <= posix2_version (); 623 624 if ((output_style == OUTPUT_CONTEXT 625 || output_style == OUTPUT_UNIFIED) 626 && (context < ocontext 627 || (ocontext < context && ! explicit_context))) 628 { 629 if (modern_usage) 630 { 631 error (0, 0, 632 _("`-%ld' option is obsolete; use `-%c %ld'"), 633 (long) ocontext, 634 output_style == OUTPUT_CONTEXT ? 'C' : 'U', 635 (long) ocontext); 636 try_help (0, 0); 637 } 638 context = ocontext; 639 } 640 else 641 { 642 if (modern_usage) 643 { 644 error (0, 0, _("`-%ld' option is obsolete; omit it"), 645 (long) ocontext); 646 try_help (0, 0); 647 } 648 } 649 } 650 651 { 652 /* 653 * We maximize first the half line width, and then the gutter width, 654 * according to the following constraints: 655 * 1. Two half lines plus a gutter must fit in a line. 656 * 2. If the half line width is nonzero: 657 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM. 658 * b. If tabs are not expanded to spaces, 659 * a half line plus a gutter is an integral number of tabs, 660 * so that tabs in the right column line up. 661 */ 662 unsigned int t = expand_tabs ? 1 : TAB_WIDTH; 663 int w = width ? width : 130; 664 int off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t; 665 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)), 666 sdiff_column2_offset = sdiff_half_width ? off : w; 667 } 668 669 /* Make the horizon at least as large as the context, so that 670 shift_boundaries has more freedom to shift the first and last hunks. */ 671 if (horizon_lines < context) 672 horizon_lines = context; 673 674 summarize_regexp_list (&function_regexp_list); 675 summarize_regexp_list (&ignore_regexp_list); 676 677 if (output_style == OUTPUT_IFDEF) 678 { 679 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 680 if (!line_format[i]) 681 line_format[i] = "%l\n"; 682 if (!group_format[OLD]) 683 group_format[OLD] 684 = group_format[CHANGED] ? group_format[CHANGED] : "%<"; 685 if (!group_format[NEW]) 686 group_format[NEW] 687 = group_format[CHANGED] ? group_format[CHANGED] : "%>"; 688 if (!group_format[UNCHANGED]) 689 group_format[UNCHANGED] = "%="; 690 if (!group_format[CHANGED]) 691 group_format[CHANGED] = concat (group_format[OLD], 692 group_format[NEW], ""); 693 } 694 695 no_diff_means_no_output = 696 (output_style == OUTPUT_IFDEF ? 697 (!*group_format[UNCHANGED] 698 || (strcmp (group_format[UNCHANGED], "%=") == 0 699 && !*line_format[UNCHANGED])) 700 : (output_style != OUTPUT_SDIFF) | suppress_common_lines); 701 702 files_can_be_treated_as_binary = 703 (brief 704 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr 705 | (ignore_regexp_list.regexps || ignore_white_space))); 706 707 switch_string = option_list (argv + 1, optind - 1); 708 709 if (from_file) 710 { 711 if (to_file) 712 fatal ("--from-file and --to-file both specified"); 713 else 714 for (; optind < argc; optind++) 715 { 716 int status = compare_files ((struct comparison *) 0, 717 from_file, argv[optind]); 718 if (exit_status < status) 719 exit_status = status; 720 } 721 } 722 else 723 { 724 if (to_file) 725 for (; optind < argc; optind++) 726 { 727 int status = compare_files ((struct comparison *) 0, 728 argv[optind], to_file); 729 if (exit_status < status) 730 exit_status = status; 731 } 732 else 733 { 734 if (argc - optind != 2) 735 { 736 if (argc - optind < 2) 737 try_help ("missing operand after `%s'", argv[argc - 1]); 738 else 739 try_help ("extra operand `%s'", argv[optind + 2]); 740 } 741 742 exit_status = compare_files ((struct comparison *) 0, 743 argv[optind], argv[optind + 1]); 744 } 745 } 746 747 /* Print any messages that were saved up for last. */ 748 print_message_queue (); 749 750 check_stdout (); 751 exit (exit_status); 752 return exit_status; 753} 754 755/* Append to REGLIST the regexp PATTERN. */ 756 757static void 758add_regexp (struct regexp_list *reglist, char const *pattern) 759{ 760 size_t patlen = strlen (pattern); 761 char const *m = re_compile_pattern (pattern, patlen, reglist->buf); 762 763 if (m != 0) 764 error (0, 0, "%s: %s", pattern, m); 765 else 766 { 767 char *regexps = reglist->regexps; 768 size_t len = reglist->len; 769 bool multiple_regexps = reglist->multiple_regexps = regexps != 0; 770 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; 771 size_t size = reglist->size; 772 773 if (size <= newlen) 774 { 775 if (!size) 776 size = 1; 777 778 do size *= 2; 779 while (size <= newlen); 780 781 reglist->size = size; 782 reglist->regexps = regexps = xrealloc (regexps, size); 783 } 784 if (multiple_regexps) 785 { 786 regexps[len++] = '\\'; 787 regexps[len++] = '|'; 788 } 789 memcpy (regexps + len, pattern, patlen + 1); 790 } 791} 792 793/* Ensure that REGLIST represents the disjunction of its regexps. 794 This is done here, rather than earlier, to avoid O(N^2) behavior. */ 795 796static void 797summarize_regexp_list (struct regexp_list *reglist) 798{ 799 if (reglist->regexps) 800 { 801 /* At least one regexp was specified. Allocate a fastmap for it. */ 802 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); 803 if (reglist->multiple_regexps) 804 { 805 /* Compile the disjunction of the regexps. 806 (If just one regexp was specified, it is already compiled.) */ 807 char const *m = re_compile_pattern (reglist->regexps, reglist->len, 808 reglist->buf); 809 if (m != 0) 810 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); 811 } 812 } 813} 814 815static void 816try_help (char const *reason_msgid, char const *operand) 817{ 818 if (reason_msgid) 819 error (0, 0, _(reason_msgid), operand); 820 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."), 821 program_name); 822 abort (); 823} 824 825static void 826check_stdout (void) 827{ 828 if (ferror (stdout)) 829 fatal ("write failed"); 830 else if (fclose (stdout) != 0) 831 pfatal_with_name (_("standard output")); 832} 833 834static char const * const option_help_msgid[] = { 835 N_("Compare files line by line."), 836 "", 837 N_("-i --ignore-case Ignore case differences in file contents."), 838 N_("--ignore-file-name-case Ignore case when comparing file names."), 839 N_("--no-ignore-file-name-case Consider case when comparing file names."), 840 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."), 841 N_("-b --ignore-space-change Ignore changes in the amount of white space."), 842 N_("-w --ignore-all-space Ignore all white space."), 843 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."), 844 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."), 845 N_("--strip-trailing-cr Strip trailing carriage return on input."), 846#if HAVE_SETMODE_DOS 847 N_("--binary Read and write data in binary mode."), 848#endif 849 N_("-a --text Treat all files as text."), 850 "", 851 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\ 852-u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\ 853 --label LABEL Use LABEL instead of file name.\n\ 854 -p --show-c-function Show which C function each change is in.\n\ 855 -F RE --show-function-line=RE Show the most recent line matching RE."), 856 N_("-q --brief Output only whether files differ."), 857 N_("-e --ed Output an ed script."), 858 N_("--normal Output a normal diff."), 859 N_("-n --rcs Output an RCS format diff."), 860 N_("-y --side-by-side Output in two columns.\n\ 861 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\ 862 --left-column Output only the left column of common lines.\n\ 863 --suppress-common-lines Do not output common lines."), 864 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."), 865 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."), 866 N_("--line-format=LFMT Similar, but format all input lines with LFMT."), 867 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."), 868 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."), 869 N_(" GFMT may contain:\n\ 870 %< lines from FILE1\n\ 871 %> lines from FILE2\n\ 872 %= lines common to FILE1 and FILE2\n\ 873 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ 874 LETTERs are as follows for new group, lower case for old group:\n\ 875 F first line number\n\ 876 L last line number\n\ 877 N number of lines = L-F+1\n\ 878 E F-1\n\ 879 M L+1"), 880 N_(" LFMT may contain:\n\ 881 %L contents of line\n\ 882 %l contents of line, excluding any trailing newline\n\ 883 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), 884 N_(" Either GFMT or LFMT may contain:\n\ 885 %% %\n\ 886 %c'C' the single character C\n\ 887 %c'\\OOO' the character with octal code OOO"), 888 "", 889 N_("-l --paginate Pass the output through `pr' to paginate it."), 890 N_("-t --expand-tabs Expand tabs to spaces in output."), 891 N_("-T --initial-tab Make tabs line up by prepending a tab."), 892 "", 893 N_("-r --recursive Recursively compare any subdirectories found."), 894 N_("-N --new-file Treat absent files as empty."), 895 N_("--unidirectional-new-file Treat absent first files as empty."), 896 N_("-s --report-identical-files Report when two files are the same."), 897 N_("-x PAT --exclude=PAT Exclude files that match PAT."), 898 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."), 899 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."), 900 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."), 901 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."), 902 "", 903 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."), 904 N_("-d --minimal Try hard to find a smaller set of changes."), 905 N_("--speed-large-files Assume large files and many scattered small changes."), 906 "", 907 N_("-v --version Output version info."), 908 N_("--help Output this help."), 909 "", 910 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."), 911 N_("If --from-file or --to-file is given, there are no restrictions on FILES."), 912 N_("If a FILE is `-', read standard input."), 913 "", 914 N_("Report bugs to <bug-gnu-utils@gnu.org>."), 915 0 916}; 917 918static void 919usage (void) 920{ 921 char const * const *p; 922 923 printf (_("Usage: %s [OPTION]... FILES\n"), program_name); 924 925 for (p = option_help_msgid; *p; p++) 926 { 927 if (!**p) 928 putchar ('\n'); 929 else 930 { 931 char const *msg = _(*p); 932 char const *nl; 933 while ((nl = strchr (msg, '\n'))) 934 { 935 int msglen = nl + 1 - msg; 936 printf (" %.*s", msglen, msg); 937 msg = nl + 1; 938 } 939 940 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); 941 } 942 } 943} 944 945/* Set VAR to VALUE, reporting an OPTION error if this is a 946 conflict. */ 947static void 948specify_value (char const **var, char const *value, char const *option) 949{ 950 if (*var && strcmp (*var, value) != 0) 951 { 952 error (0, 0, _("conflicting %s option value `%s'"), option, value); 953 try_help (0, 0); 954 } 955 *var = value; 956} 957 958/* Set the output style to STYLE, diagnosing conflicts. */ 959static void 960specify_style (enum output_style style) 961{ 962 if (output_style != style) 963 { 964 if (output_style != OUTPUT_UNSPECIFIED) 965 try_help ("conflicting output style options", 0); 966 output_style = style; 967 } 968} 969 970static char const * 971filetype (struct stat const *st) 972{ 973 /* See POSIX 1003.1-2001 for these formats. 974 975 To keep diagnostics grammatical in English, the returned string 976 must start with a consonant. */ 977 978 if (S_ISREG (st->st_mode)) 979 return st->st_size == 0 ? _("regular empty file") : _("regular file"); 980 981 if (S_ISDIR (st->st_mode)) return _("directory"); 982 983#ifdef S_ISBLK 984 if (S_ISBLK (st->st_mode)) return _("block special file"); 985#endif 986#ifdef S_ISCHR 987 if (S_ISCHR (st->st_mode)) return _("character special file"); 988#endif 989#ifdef S_ISFIFO 990 if (S_ISFIFO (st->st_mode)) return _("fifo"); 991#endif 992 /* S_ISLNK is impossible with `fstat' and `stat'. */ 993#ifdef S_ISSOCK 994 if (S_ISSOCK (st->st_mode)) return _("socket"); 995#endif 996#ifdef S_TYPEISMQ 997 if (S_TYPEISMQ (st)) return _("message queue"); 998#endif 999#ifdef S_TYPEISSEM 1000 if (S_TYPEISSEM (st)) return _("semaphore"); 1001#endif 1002#ifdef S_TYPEISSHM 1003 if (S_TYPEISSHM (st)) return _("shared memory object"); 1004#endif 1005#ifdef S_TYPEISTMO 1006 if (S_TYPEISTMO (st)) return _("typed memory object"); 1007#endif 1008 1009 return _("weird file"); 1010} 1011 1012/* Set the last-modified time of *ST to be the current time. */ 1013 1014static void 1015set_mtime_to_now (struct stat *st) 1016{ 1017#ifdef ST_MTIM_NSEC 1018 1019# if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME 1020 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0) 1021 return; 1022# endif 1023 1024# if HAVE_GETTIMEOFDAY 1025 { 1026 struct timeval timeval; 1027 if (gettimeofday (&timeval, NULL) == 0) 1028 { 1029 st->st_mtime = timeval.tv_sec; 1030 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000; 1031 return; 1032 } 1033 } 1034# endif 1035 1036#endif /* ST_MTIM_NSEC */ 1037 1038 time (&st->st_mtime); 1039} 1040 1041/* Compare two files (or dirs) with parent comparison PARENT 1042 and names NAME0 and NAME1. 1043 (If PARENT is 0, then the first name is just NAME0, etc.) 1044 This is self-contained; it opens the files and closes them. 1045 1046 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if 1047 different, EXIT_TROUBLE if there is a problem opening them. */ 1048 1049static int 1050compare_files (struct comparison const *parent, 1051 char const *name0, 1052 char const *name1) 1053{ 1054 struct comparison cmp; 1055#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) 1056 register int f; 1057 int status = EXIT_SUCCESS; 1058 bool same_files; 1059 char *free0, *free1; 1060 1061 /* If this is directory comparison, perhaps we have a file 1062 that exists only in one of the directories. 1063 If so, just print a message to that effect. */ 1064 1065 if (! ((name0 && name1) 1066 || (unidirectional_new_file && name1) 1067 || new_file)) 1068 { 1069 char const *name = name0 == 0 ? name1 : name0; 1070 char const *dir = parent->file[name0 == 0].name; 1071 1072 /* See POSIX 1003.1-2001 for this format. */ 1073 message ("Only in %s: %s\n", dir, name); 1074 1075 /* Return EXIT_FAILURE so that diff_dirs will return 1076 EXIT_FAILURE ("some files differ"). */ 1077 return EXIT_FAILURE; 1078 } 1079 1080 memset (cmp.file, 0, sizeof cmp.file); 1081 cmp.parent = parent; 1082 1083 /* cmp.file[f].desc markers */ 1084#define NONEXISTENT (-1) /* nonexistent file */ 1085#define UNOPENED (-2) /* unopened file (e.g. directory) */ 1086#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ 1087 1088#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ 1089 1090 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED; 1091 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED; 1092 1093 /* Now record the full name of each file, including nonexistent ones. */ 1094 1095 if (name0 == 0) 1096 name0 = name1; 1097 if (name1 == 0) 1098 name1 = name0; 1099 1100 if (!parent) 1101 { 1102 free0 = 0; 1103 free1 = 0; 1104 cmp.file[0].name = name0; 1105 cmp.file[1].name = name1; 1106 } 1107 else 1108 { 1109 cmp.file[0].name = free0 1110 = dir_file_pathname (parent->file[0].name, name0); 1111 cmp.file[1].name = free1 1112 = dir_file_pathname (parent->file[1].name, name1); 1113 } 1114 1115 /* Stat the files. */ 1116 1117 for (f = 0; f < 2; f++) 1118 { 1119 if (cmp.file[f].desc != NONEXISTENT) 1120 { 1121 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) 1122 { 1123 cmp.file[f].desc = cmp.file[0].desc; 1124 cmp.file[f].stat = cmp.file[0].stat; 1125 } 1126 else if (strcmp (cmp.file[f].name, "-") == 0) 1127 { 1128 cmp.file[f].desc = STDIN_FILENO; 1129 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) 1130 cmp.file[f].desc = ERRNO_ENCODE (errno); 1131 else 1132 { 1133 if (S_ISREG (cmp.file[f].stat.st_mode)) 1134 { 1135 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR); 1136 if (pos < 0) 1137 cmp.file[f].desc = ERRNO_ENCODE (errno); 1138 else 1139 cmp.file[f].stat.st_size = 1140 MAX (0, cmp.file[f].stat.st_size - pos); 1141 } 1142 1143 /* POSIX 1003.1-2001 requires current time for 1144 stdin. */ 1145 set_mtime_to_now (&cmp.file[f].stat); 1146 } 1147 } 1148 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0) 1149 cmp.file[f].desc = ERRNO_ENCODE (errno); 1150 } 1151 } 1152 1153 /* Mark files as nonexistent at the top level as needed for -N and 1154 --unidirectional-new-file. */ 1155 if (! parent) 1156 { 1157 if ((new_file | unidirectional_new_file) 1158 && cmp.file[0].desc == ERRNO_ENCODE (ENOENT) 1159 && cmp.file[1].desc == UNOPENED) 1160 cmp.file[0].desc = NONEXISTENT; 1161 1162 if (new_file 1163 && cmp.file[0].desc == UNOPENED 1164 && cmp.file[1].desc == ERRNO_ENCODE (ENOENT)) 1165 cmp.file[1].desc = NONEXISTENT; 1166 } 1167 1168 for (f = 0; f < 2; f++) 1169 if (cmp.file[f].desc == NONEXISTENT) 1170 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; 1171 1172 for (f = 0; f < 2; f++) 1173 { 1174 int e = ERRNO_DECODE (cmp.file[f].desc); 1175 if (0 <= e) 1176 { 1177 errno = e; 1178 perror_with_name (cmp.file[f].name); 1179 status = EXIT_TROUBLE; 1180 } 1181 } 1182 1183 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) 1184 { 1185 /* If one is a directory, and it was specified in the command line, 1186 use the file in that dir with the other file's basename. */ 1187 1188 int fnm_arg = DIR_P (0); 1189 int dir_arg = 1 - fnm_arg; 1190 char const *fnm = cmp.file[fnm_arg].name; 1191 char const *dir = cmp.file[dir_arg].name; 1192 char const *filename = cmp.file[dir_arg].name = free0 1193 = dir_file_pathname (dir, base_name (fnm)); 1194 1195 if (strcmp (fnm, "-") == 0) 1196 fatal ("cannot compare `-' to a directory"); 1197 1198 if (stat (filename, &cmp.file[dir_arg].stat) != 0) 1199 { 1200 perror_with_name (filename); 1201 status = EXIT_TROUBLE; 1202 } 1203 } 1204 1205 if (status != EXIT_SUCCESS) 1206 { 1207 /* One of the files should exist but does not. */ 1208 } 1209 else if ((same_files 1210 = (cmp.file[0].desc != NONEXISTENT 1211 && cmp.file[1].desc != NONEXISTENT 1212 && (same_special_file (&cmp.file[0].stat, &cmp.file[1].stat) 1213 || (0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) 1214 && same_file_attributes (&cmp.file[0].stat, 1215 &cmp.file[1].stat))))) 1216 && no_diff_means_no_output) 1217 { 1218 /* The two named files are actually the same physical file. 1219 We know they are identical without actually reading them. */ 1220 } 1221 else if (DIR_P (0) & DIR_P (1)) 1222 { 1223 if (output_style == OUTPUT_IFDEF) 1224 fatal ("-D option not supported with directories"); 1225 1226 /* If both are directories, compare the files in them. */ 1227 1228 if (parent && !recursive) 1229 { 1230 /* But don't compare dir contents one level down 1231 unless -r was specified. 1232 See POSIX 1003.1-2001 for this format. */ 1233 message ("Common subdirectories: %s and %s\n", 1234 cmp.file[0].name, cmp.file[1].name); 1235 } 1236 else 1237 status = diff_dirs (&cmp, compare_files); 1238 } 1239 else if ((DIR_P (0) | DIR_P (1)) 1240 || (parent 1241 && (! S_ISREG (cmp.file[0].stat.st_mode) 1242 || ! S_ISREG (cmp.file[1].stat.st_mode)))) 1243 { 1244 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) 1245 { 1246 /* We have a subdirectory that exists only in one directory. */ 1247 1248 if ((DIR_P (0) | DIR_P (1)) 1249 && recursive 1250 && (new_file 1251 || (unidirectional_new_file 1252 && cmp.file[0].desc == NONEXISTENT))) 1253 status = diff_dirs (&cmp, compare_files); 1254 else 1255 { 1256 char const *dir 1257 = parent->file[cmp.file[0].desc == NONEXISTENT].name; 1258 1259 /* See POSIX 1003.1-2001 for this format. */ 1260 message ("Only in %s: %s\n", dir, name0); 1261 1262 status = EXIT_FAILURE; 1263 } 1264 } 1265 else 1266 { 1267 /* We have two files that are not to be compared. */ 1268 1269 /* See POSIX 1003.1-2001 for this format. */ 1270 message5 ("File %s is a %s while file %s is a %s\n", 1271 file_label[0] ? file_label[0] : cmp.file[0].name, 1272 filetype (&cmp.file[0].stat), 1273 file_label[1] ? file_label[1] : cmp.file[1].name, 1274 filetype (&cmp.file[1].stat)); 1275 1276 /* This is a difference. */ 1277 status = EXIT_FAILURE; 1278 } 1279 } 1280 else if (files_can_be_treated_as_binary 1281 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size 1282 && (cmp.file[0].desc == NONEXISTENT 1283 || S_ISREG (cmp.file[0].stat.st_mode)) 1284 && (cmp.file[1].desc == NONEXISTENT 1285 || S_ISREG (cmp.file[1].stat.st_mode))) 1286 { 1287 message ("Files %s and %s differ\n", 1288 file_label[0] ? file_label[0] : cmp.file[0].name, 1289 file_label[1] ? file_label[1] : cmp.file[1].name); 1290 status = EXIT_FAILURE; 1291 } 1292 else 1293 { 1294 /* Both exist and neither is a directory. */ 1295 1296 /* Open the files and record their descriptors. */ 1297 1298 if (cmp.file[0].desc == UNOPENED) 1299 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0) 1300 { 1301 perror_with_name (cmp.file[0].name); 1302 status = EXIT_TROUBLE; 1303 } 1304 if (cmp.file[1].desc == UNOPENED) 1305 { 1306 if (same_files) 1307 cmp.file[1].desc = cmp.file[0].desc; 1308 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0)) 1309 < 0) 1310 { 1311 perror_with_name (cmp.file[1].name); 1312 status = EXIT_TROUBLE; 1313 } 1314 } 1315 1316#if HAVE_SETMODE_DOS 1317 if (binary) 1318 for (f = 0; f < 2; f++) 1319 if (0 <= cmp.file[f].desc) 1320 set_binary_mode (cmp.file[f].desc, 1); 1321#endif 1322 1323 /* Compare the files, if no error was found. */ 1324 1325 if (status == EXIT_SUCCESS) 1326 status = diff_2_files (&cmp); 1327 1328 /* Close the file descriptors. */ 1329 1330 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) 1331 { 1332 perror_with_name (cmp.file[0].name); 1333 status = EXIT_TROUBLE; 1334 } 1335 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc 1336 && close (cmp.file[1].desc) != 0) 1337 { 1338 perror_with_name (cmp.file[1].name); 1339 status = EXIT_TROUBLE; 1340 } 1341 } 1342 1343 /* Now the comparison has been done, if no error prevented it, 1344 and STATUS is the value this function will return. */ 1345 1346 if (status == EXIT_SUCCESS) 1347 { 1348 if (report_identical_files && !DIR_P (0)) 1349 message ("Files %s and %s are identical\n", 1350 file_label[0] ? file_label[0] : cmp.file[0].name, 1351 file_label[1] ? file_label[1] : cmp.file[1].name); 1352 } 1353 else 1354 { 1355 /* Flush stdout so that the user sees differences immediately. 1356 This can hurt performance, unfortunately. */ 1357 if (fflush (stdout) != 0) 1358 pfatal_with_name (_("standard output")); 1359 } 1360 1361 if (free0) 1362 free (free0); 1363 if (free1) 1364 free (free1); 1365 1366 return status; 1367} 1368