1/* diff - compare files line by line 2 3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002 4 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 GNU DIFF is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 GNU DIFF is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 16 See the GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GNU DIFF; see the file COPYING. 20 If not, write to the Free Software Foundation, 21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23#define GDIFF_MAIN 24#include "diff.h" 25#include <c-stack.h> 26#include <dirname.h> 27#include <error.h> 28#include <exclude.h> 29#include <exitfail.h> 30#include <fnmatch.h> 31#include <freesoft.h> 32#include <getopt.h> 33#include <hard-locale.h> 34#include <prepargs.h> 35#include <quotesys.h> 36#include <regex.h> 37#include <setmode.h> 38#include <xalloc.h> 39 40static char const authorship_msgid[] = 41 N_("Written by Paul Eggert, Mike Haertel, David Hayes,\n\ 42Richard Stallman, and Len Tower."); 43 44static char const copyright_string[] = 45 "Copyright (C) 2002 Free Software Foundation, Inc."; 46 47#ifndef GUTTER_WIDTH_MINIMUM 48# define GUTTER_WIDTH_MINIMUM 3 49#endif 50 51struct regexp_list 52{ 53 char *regexps; /* chars representing disjunction of the regexps */ 54 size_t len; /* chars used in `regexps' */ 55 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */ 56 bool multiple_regexps;/* Does `regexps' represent a disjunction? */ 57 struct re_pattern_buffer *buf; 58}; 59 60static int compare_files (struct comparison const *, char const *, char const *); 61static void add_regexp (struct regexp_list *, char const *); 62static void summarize_regexp_list (struct regexp_list *); 63static void specify_style (enum output_style); 64static void specify_value (char const **, char const *, char const *); 65static void try_help (char const *, char const *) __attribute__((noreturn)); 66static void check_stdout (void); 67static void usage (void); 68 69/* If comparing directories, compare their common subdirectories 70 recursively. */ 71static bool recursive; 72 73/* In context diffs, show previous lines that match these regexps. */ 74static struct regexp_list function_regexp_list; 75 76/* Ignore changes affecting only lines that match these regexps. */ 77static struct regexp_list ignore_regexp_list; 78 79#if HAVE_SETMODE_DOS 80/* Use binary I/O when reading and writing data (--binary). 81 On POSIX hosts, this has no effect. */ 82static bool binary; 83#endif 84 85/* When comparing directories, if a file appears only in one 86 directory, treat it as present but empty in the other (-N). 87 Then `patch' would create the file with appropriate contents. */ 88static bool new_file; 89 90/* When comparing directories, if a file appears only in the second 91 directory of the two, treat it as present but empty in the other 92 (--unidirectional-new-file). 93 Then `patch' would create the file with appropriate contents. */ 94static bool unidirectional_new_file; 95 96/* Report files compared that are the same (-s). 97 Normally nothing is output when that happens. */ 98static bool report_identical_files; 99 100 101/* Return a string containing the command options with which diff was invoked. 102 Spaces appear between what were separate ARGV-elements. 103 There is a space at the beginning but none at the end. 104 If there were no options, the result is an empty string. 105 106 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, 107 the length of that vector. */ 108 109static char * 110option_list (char **optionvec, int count) 111{ 112 int i; 113 size_t size = 1; 114 char *result; 115 char *p; 116 117 for (i = 0; i < count; i++) 118 size += 1 + quote_system_arg ((char *) 0, optionvec[i]); 119 120 p = result = xmalloc (size); 121 122 for (i = 0; i < count; i++) 123 { 124 *p++ = ' '; 125 p += quote_system_arg (p, optionvec[i]); 126 } 127 128 *p = 0; 129 return result; 130} 131 132 133/* Return an option value suitable for add_exclude. */ 134 135static int 136exclude_options (void) 137{ 138 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); 139} 140 141static char const shortopts[] = 142"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y"; 143 144/* Values for long options that do not have single-letter equivalents. */ 145enum 146{ 147 BINARY_OPTION = CHAR_MAX + 1, 148 FROM_FILE_OPTION, 149 HELP_OPTION, 150 HORIZON_LINES_OPTION, 151 IGNORE_FILE_NAME_CASE_OPTION, 152 INHIBIT_HUNK_MERGE_OPTION, 153 LEFT_COLUMN_OPTION, 154 LINE_FORMAT_OPTION, 155 NO_IGNORE_FILE_NAME_CASE_OPTION, 156 NORMAL_OPTION, 157 SDIFF_MERGE_ASSIST_OPTION, 158 STRIP_TRAILING_CR_OPTION, 159 SUPPRESS_COMMON_LINES_OPTION, 160 TO_FILE_OPTION, 161 162 /* These options must be in sequence. */ 163 UNCHANGED_LINE_FORMAT_OPTION, 164 OLD_LINE_FORMAT_OPTION, 165 NEW_LINE_FORMAT_OPTION, 166 167 /* These options must be in sequence. */ 168 UNCHANGED_GROUP_FORMAT_OPTION, 169 OLD_GROUP_FORMAT_OPTION, 170 NEW_GROUP_FORMAT_OPTION, 171 CHANGED_GROUP_FORMAT_OPTION 172}; 173 174static char const group_format_option[][sizeof "--unchanged-group-format"] = 175 { 176 "--unchanged-group-format", 177 "--old-group-format", 178 "--new-group-format", 179 "--changed-group-format" 180 }; 181 182static char const line_format_option[][sizeof "--unchanged-line-format"] = 183 { 184 "--unchanged-line-format", 185 "--old-line-format", 186 "--new-line-format" 187 }; 188 189static struct option const longopts[] = 190{ 191 {"binary", 0, 0, BINARY_OPTION}, 192 {"brief", 0, 0, 'q'}, 193 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, 194 {"context", 2, 0, 'C'}, 195 {"ed", 0, 0, 'e'}, 196 {"exclude", 1, 0, 'x'}, 197 {"exclude-from", 1, 0, 'X'}, 198 {"expand-tabs", 0, 0, 't'}, 199 {"forward-ed", 0, 0, 'f'}, 200 {"from-file", 1, 0, FROM_FILE_OPTION}, 201 {"help", 0, 0, HELP_OPTION}, 202 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, 203 {"ifdef", 1, 0, 'D'}, 204 {"ignore-all-space", 0, 0, 'w'}, 205 {"ignore-blank-lines", 0, 0, 'B'}, 206 {"ignore-case", 0, 0, 'i'}, 207 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, 208 {"ignore-matching-lines", 1, 0, 'I'}, 209 {"ignore-space-change", 0, 0, 'b'}, 210 {"ignore-tab-expansion", 0, 0, 'E'}, 211 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, 212 {"initial-tab", 0, 0, 'T'}, 213 {"label", 1, 0, 'L'}, 214 {"left-column", 0, 0, LEFT_COLUMN_OPTION}, 215 {"line-format", 1, 0, LINE_FORMAT_OPTION}, 216 {"minimal", 0, 0, 'd'}, 217 {"new-file", 0, 0, 'N'}, 218 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, 219 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, 220 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, 221 {"normal", 0, 0, NORMAL_OPTION}, 222 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, 223 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, 224 {"paginate", 0, 0, 'l'}, 225 {"rcs", 0, 0, 'n'}, 226 {"recursive", 0, 0, 'r'}, 227 {"report-identical-files", 0, 0, 's'}, 228 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, 229 {"show-c-function", 0, 0, 'p'}, 230 {"show-function-line", 1, 0, 'F'}, 231 {"side-by-side", 0, 0, 'y'}, 232 {"speed-large-files", 0, 0, 'H'}, 233 {"starting-file", 1, 0, 'S'}, 234 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, 235 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, 236 {"text", 0, 0, 'a'}, 237 {"to-file", 1, 0, TO_FILE_OPTION}, 238 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, 239 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, 240 {"unidirectional-new-file", 0, 0, 'P'}, 241 {"unified", 2, 0, 'U'}, 242 {"version", 0, 0, 'v'}, 243 {"width", 1, 0, 'W'}, 244 {0, 0, 0, 0} 245}; 246 247int 248main (int argc, char **argv) 249{ 250 int exit_status = EXIT_SUCCESS; 251 int c; 252 int i; 253 int prev = -1; 254 lin ocontext = -1; 255 bool explicit_context = 0; 256 int width = 0; 257 bool show_c_function = 0; 258 char const *from_file = 0; 259 char const *to_file = 0; 260 uintmax_t numval; 261 char *numend; 262 263 /* Do our initializations. */ 264 exit_failure = 2; 265 initialize_main (&argc, &argv); 266 program_name = argv[0]; 267 setlocale (LC_ALL, ""); 268 bindtextdomain (PACKAGE, LOCALEDIR); 269 textdomain (PACKAGE); 270 c_stack_action (c_stack_die); 271 function_regexp_list.buf = &function_regexp; 272 ignore_regexp_list.buf = &ignore_regexp; 273 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); 274 excluded = new_exclude (); 275 276 /* Decode the options. */ 277 278 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1) 279 { 280 switch (c) 281 { 282 case 0: 283 break; 284 285 case '0': 286 case '1': 287 case '2': 288 case '3': 289 case '4': 290 case '5': 291 case '6': 292 case '7': 293 case '8': 294 case '9': 295 if (! ISDIGIT (prev)) 296 ocontext = c - '0'; 297 else if (LIN_MAX / 10 < ocontext 298 || ((ocontext = 10 * ocontext + c - '0') < 0)) 299 ocontext = LIN_MAX; 300 break; 301 302 case 'a': 303 text = 1; 304 break; 305 306 case 'b': 307 if (ignore_white_space < IGNORE_SPACE_CHANGE) 308 ignore_white_space = IGNORE_SPACE_CHANGE; 309 break; 310 311 case 'B': 312 ignore_blank_lines = 1; 313 break; 314 315 case 'C': /* +context[=lines] */ 316 case 'U': /* +unified[=lines] */ 317 { 318 if (optarg) 319 { 320 numval = strtoumax (optarg, &numend, 10); 321 if (*numend) 322 try_help ("invalid context length `%s'", optarg); 323 if (LIN_MAX < numval) 324 numval = LIN_MAX; 325 } 326 else 327 numval = 3; 328 329 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); 330 if (context < numval) 331 context = numval; 332 explicit_context = 1; 333 } 334 break; 335 336 case 'c': 337 specify_style (OUTPUT_CONTEXT); 338 if (context < 3) 339 context = 3; 340 break; 341 342 case 'd': 343 minimal = 1; 344 break; 345 346 case 'D': 347 specify_style (OUTPUT_IFDEF); 348 { 349 static char const C_ifdef_group_formats[] = 350 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; 351 char *b = xmalloc (sizeof C_ifdef_group_formats 352 + 7 * strlen (optarg) - 14 /* 7*"%s" */ 353 - 8 /* 5*"%%" + 3*"%c" */); 354 sprintf (b, C_ifdef_group_formats, 355 0, 356 optarg, optarg, 0, 357 optarg, optarg, 0, 358 optarg, optarg, optarg); 359 for (i = 0; i < sizeof group_format / sizeof *group_format; i++) 360 { 361 specify_value (&group_format[i], b, "-D"); 362 b += strlen (b) + 1; 363 } 364 } 365 break; 366 367 case 'e': 368 specify_style (OUTPUT_ED); 369 break; 370 371 case 'E': 372 if (ignore_white_space < IGNORE_TAB_EXPANSION) 373 ignore_white_space = IGNORE_TAB_EXPANSION; 374 break; 375 376 case 'f': 377 specify_style (OUTPUT_FORWARD_ED); 378 break; 379 380 case 'F': 381 add_regexp (&function_regexp_list, optarg); 382 break; 383 384 case 'h': 385 /* Split the files into chunks for faster processing. 386 Usually does not change the result. 387 388 This currently has no effect. */ 389 break; 390 391 case 'H': 392 speed_large_files = 1; 393 break; 394 395 case 'i': 396 ignore_case = 1; 397 break; 398 399 case 'I': 400 add_regexp (&ignore_regexp_list, optarg); 401 break; 402 403 case 'l': 404 if (!pr_program[0]) 405 try_help ("pagination not supported on this host", 0); 406 paginate = 1; 407#ifdef SIGCHLD 408 /* Pagination requires forking and waiting, and 409 System V fork+wait does not work if SIGCHLD is ignored. */ 410 signal (SIGCHLD, SIG_DFL); 411#endif 412 break; 413 414 case 'L': 415 if (!file_label[0]) 416 file_label[0] = optarg; 417 else if (!file_label[1]) 418 file_label[1] = optarg; 419 else 420 fatal ("too many file label options"); 421 break; 422 423 case 'n': 424 specify_style (OUTPUT_RCS); 425 break; 426 427 case 'N': 428 new_file = 1; 429 break; 430 431 case 'p': 432 show_c_function = 1; 433 add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); 434 break; 435 436 case 'P': 437 unidirectional_new_file = 1; 438 break; 439 440 case 'q': 441 brief = 1; 442 break; 443 444 case 'r': 445 recursive = 1; 446 break; 447 448 case 's': 449 report_identical_files = 1; 450 break; 451 452 case 'S': 453 specify_value (&starting_file, optarg, "-S"); 454 break; 455 456 case 't': 457 expand_tabs = 1; 458 break; 459 460 case 'T': 461 initial_tab = 1; 462 break; 463 464 case 'u': 465 specify_style (OUTPUT_UNIFIED); 466 if (context < 3) 467 context = 3; 468 break; 469 470 case 'v': 471 printf ("diff %s\n%s\n\n%s\n\n%s\n", 472 version_string, copyright_string, 473 _(free_software_msgid), _(authorship_msgid)); 474 check_stdout (); 475 return EXIT_SUCCESS; 476 477 case 'w': 478 ignore_white_space = IGNORE_ALL_SPACE; 479 break; 480 481 case 'x': 482 add_exclude (excluded, optarg, exclude_options ()); 483 break; 484 485 case 'X': 486 if (add_exclude_file (add_exclude, excluded, optarg, 487 exclude_options (), '\n')) 488 pfatal_with_name (optarg); 489 break; 490 491 case 'y': 492 specify_style (OUTPUT_SDIFF); 493 break; 494 495 case 'W': 496 numval = strtoumax (optarg, &numend, 10); 497 if (! (0 < numval && numval <= INT_MAX) || *numend) 498 try_help ("invalid width `%s'", optarg); 499 if (width != numval) 500 { 501 if (width) 502 fatal ("conflicting width options"); 503 width = numval; 504 } 505 break; 506 507 case BINARY_OPTION: 508#if HAVE_SETMODE_DOS 509 binary = 1; 510 set_binary_mode (STDOUT_FILENO, 1); 511#endif 512 break; 513 514 case FROM_FILE_OPTION: 515 specify_value (&from_file, optarg, "--from-file"); 516 break; 517 518 case HELP_OPTION: 519 usage (); 520 check_stdout (); 521 return EXIT_SUCCESS; 522 523 case HORIZON_LINES_OPTION: 524 numval = strtoumax (optarg, &numend, 10); 525 if (*numend) 526 try_help ("invalid horizon length `%s'", optarg); 527 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); 528 break; 529 530 case IGNORE_FILE_NAME_CASE_OPTION: 531 ignore_file_name_case = 1; 532 break; 533 534 case INHIBIT_HUNK_MERGE_OPTION: 535 /* This option is obsolete, but accept it for backward 536 compatibility. */ 537 break; 538 539 case LEFT_COLUMN_OPTION: 540 left_column = 1; 541 break; 542 543 case LINE_FORMAT_OPTION: 544 specify_style (OUTPUT_IFDEF); 545 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 546 specify_value (&line_format[i], optarg, "--line-format"); 547 break; 548 549 case NO_IGNORE_FILE_NAME_CASE_OPTION: 550 ignore_file_name_case = 0; 551 break; 552 553 case NORMAL_OPTION: 554 specify_style (OUTPUT_NORMAL); 555 break; 556 557 case SDIFF_MERGE_ASSIST_OPTION: 558 specify_style (OUTPUT_SDIFF); 559 sdiff_merge_assist = 1; 560 break; 561 562 case STRIP_TRAILING_CR_OPTION: 563 strip_trailing_cr = 1; 564 break; 565 566 case SUPPRESS_COMMON_LINES_OPTION: 567 suppress_common_lines = 1; 568 break; 569 570 case TO_FILE_OPTION: 571 specify_value (&to_file, optarg, "--to-file"); 572 break; 573 574 case UNCHANGED_LINE_FORMAT_OPTION: 575 case OLD_LINE_FORMAT_OPTION: 576 case NEW_LINE_FORMAT_OPTION: 577 specify_style (OUTPUT_IFDEF); 578 c -= UNCHANGED_LINE_FORMAT_OPTION; 579 specify_value (&line_format[c], optarg, line_format_option[c]); 580 break; 581 582 case UNCHANGED_GROUP_FORMAT_OPTION: 583 case OLD_GROUP_FORMAT_OPTION: 584 case NEW_GROUP_FORMAT_OPTION: 585 case CHANGED_GROUP_FORMAT_OPTION: 586 specify_style (OUTPUT_IFDEF); 587 c -= UNCHANGED_GROUP_FORMAT_OPTION; 588 specify_value (&group_format[c], optarg, group_format_option[c]); 589 break; 590 591 default: 592 try_help (0, 0); 593 } 594 prev = c; 595 } 596 597 if (output_style == OUTPUT_UNSPECIFIED) 598 { 599 if (show_c_function) 600 { 601 specify_style (OUTPUT_CONTEXT); 602 if (ocontext < 0) 603 context = 3; 604 } 605 else 606 specify_style (OUTPUT_NORMAL); 607 } 608 609 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) 610 time_format = "%Y-%m-%d %H:%M:%S.%N %z"; 611 else 612 { 613 /* See POSIX 1003.1-2001 for this format. */ 614 time_format = "%a %b %e %T %Y"; 615 } 616 617 if (0 <= ocontext) 618 { 619 bool modern_usage = 200112 <= posix2_version (); 620 621 if ((output_style == OUTPUT_CONTEXT 622 || output_style == OUTPUT_UNIFIED) 623 && (context < ocontext 624 || (ocontext < context && ! explicit_context))) 625 { 626 if (modern_usage) 627 { 628 error (0, 0, 629 _("`-%ld' option is obsolete; use `-%c %ld'"), 630 (long) ocontext, 631 output_style == OUTPUT_CONTEXT ? 'C' : 'U', 632 (long) ocontext); 633 try_help (0, 0); 634 } 635 context = ocontext; 636 } 637 else 638 { 639 if (modern_usage) 640 { 641 error (0, 0, _("`-%ld' option is obsolete; omit it"), 642 (long) ocontext); 643 try_help (0, 0); 644 } 645 } 646 } 647 648 { 649 /* 650 * We maximize first the half line width, and then the gutter width, 651 * according to the following constraints: 652 * 1. Two half lines plus a gutter must fit in a line. 653 * 2. If the half line width is nonzero: 654 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM. 655 * b. If tabs are not expanded to spaces, 656 * a half line plus a gutter is an integral number of tabs, 657 * so that tabs in the right column line up. 658 */ 659 unsigned int t = expand_tabs ? 1 : TAB_WIDTH; 660 int w = width ? width : 130; 661 int off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t; 662 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)), 663 sdiff_column2_offset = sdiff_half_width ? off : w; 664 } 665 666 /* Make the horizon at least as large as the context, so that 667 shift_boundaries has more freedom to shift the first and last hunks. */ 668 if (horizon_lines < context) 669 horizon_lines = context; 670 671 summarize_regexp_list (&function_regexp_list); 672 summarize_regexp_list (&ignore_regexp_list); 673 674 if (output_style == OUTPUT_IFDEF) 675 { 676 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 677 if (!line_format[i]) 678 line_format[i] = "%l\n"; 679 if (!group_format[OLD]) 680 group_format[OLD] 681 = group_format[CHANGED] ? group_format[CHANGED] : "%<"; 682 if (!group_format[NEW]) 683 group_format[NEW] 684 = group_format[CHANGED] ? group_format[CHANGED] : "%>"; 685 if (!group_format[UNCHANGED]) 686 group_format[UNCHANGED] = "%="; 687 if (!group_format[CHANGED]) 688 group_format[CHANGED] = concat (group_format[OLD], 689 group_format[NEW], ""); 690 } 691 692 no_diff_means_no_output = 693 (output_style == OUTPUT_IFDEF ? 694 (!*group_format[UNCHANGED] 695 || (strcmp (group_format[UNCHANGED], "%=") == 0 696 && !*line_format[UNCHANGED])) 697 : (output_style != OUTPUT_SDIFF) | suppress_common_lines); 698 699 files_can_be_treated_as_binary = 700 (brief 701 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr 702 | (ignore_regexp_list.regexps || ignore_white_space))); 703 704 switch_string = option_list (argv + 1, optind - 1); 705 706 if (from_file) 707 { 708 if (to_file) 709 fatal ("--from-file and --to-file both specified"); 710 else 711 for (; optind < argc; optind++) 712 { 713 int status = compare_files ((struct comparison *) 0, 714 from_file, argv[optind]); 715 if (exit_status < status) 716 exit_status = status; 717 } 718 } 719 else 720 { 721 if (to_file) 722 for (; optind < argc; optind++) 723 { 724 int status = compare_files ((struct comparison *) 0, 725 argv[optind], to_file); 726 if (exit_status < status) 727 exit_status = status; 728 } 729 else 730 { 731 if (argc - optind != 2) 732 { 733 if (argc - optind < 2) 734 try_help ("missing operand after `%s'", argv[argc - 1]); 735 else 736 try_help ("extra operand `%s'", argv[optind + 2]); 737 } 738 739 exit_status = compare_files ((struct comparison *) 0, 740 argv[optind], argv[optind + 1]); 741 } 742 } 743 744 /* Print any messages that were saved up for last. */ 745 print_message_queue (); 746 747 check_stdout (); 748 exit (exit_status); 749 return exit_status; 750} 751 752/* Append to REGLIST the regexp PATTERN. */ 753 754static void 755add_regexp (struct regexp_list *reglist, char const *pattern) 756{ 757 size_t patlen = strlen (pattern); 758 char const *m = re_compile_pattern (pattern, patlen, reglist->buf); 759 760 if (m != 0) 761 error (0, 0, "%s: %s", pattern, m); 762 else 763 { 764 char *regexps = reglist->regexps; 765 size_t len = reglist->len; 766 bool multiple_regexps = reglist->multiple_regexps = regexps != 0; 767 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; 768 size_t size = reglist->size; 769 770 if (size <= newlen) 771 { 772 if (!size) 773 size = 1; 774 775 do size *= 2; 776 while (size <= newlen); 777 778 reglist->size = size; 779 reglist->regexps = regexps = xrealloc (regexps, size); 780 } 781 if (multiple_regexps) 782 { 783 regexps[len++] = '\\'; 784 regexps[len++] = '|'; 785 } 786 memcpy (regexps + len, pattern, patlen + 1); 787 } 788} 789 790/* Ensure that REGLIST represents the disjunction of its regexps. 791 This is done here, rather than earlier, to avoid O(N^2) behavior. */ 792 793static void 794summarize_regexp_list (struct regexp_list *reglist) 795{ 796 if (reglist->regexps) 797 { 798 /* At least one regexp was specified. Allocate a fastmap for it. */ 799 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); 800 if (reglist->multiple_regexps) 801 { 802 /* Compile the disjunction of the regexps. 803 (If just one regexp was specified, it is already compiled.) */ 804 char const *m = re_compile_pattern (reglist->regexps, reglist->len, 805 reglist->buf); 806 if (m != 0) 807 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); 808 } 809 } 810} 811 812static void 813try_help (char const *reason_msgid, char const *operand) 814{ 815 if (reason_msgid) 816 error (0, 0, _(reason_msgid), operand); 817 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."), 818 program_name); 819 abort (); 820} 821 822static void 823check_stdout (void) 824{ 825 if (ferror (stdout)) 826 fatal ("write failed"); 827 else if (fclose (stdout) != 0) 828 pfatal_with_name (_("standard output")); 829} 830 831static char const * const option_help_msgid[] = { 832 N_("Compare files line by line."), 833 "", 834 N_("-i --ignore-case Ignore case differences in file contents."), 835 N_("--ignore-file-name-case Ignore case when comparing file names."), 836 N_("--no-ignore-file-name-case Consider case when comparing file names."), 837 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."), 838 N_("-b --ignore-space-change Ignore changes in the amount of white space."), 839 N_("-w --ignore-all-space Ignore all white space."), 840 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."), 841 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."), 842 N_("--strip-trailing-cr Strip trailing carriage return on input."), 843#if HAVE_SETMODE_DOS 844 N_("--binary Read and write data in binary mode."), 845#endif 846 N_("-a --text Treat all files as text."), 847 "", 848 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\ 849-u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\ 850 --label LABEL Use LABEL instead of file name.\n\ 851 -p --show-c-function Show which C function each change is in.\n\ 852 -F RE --show-function-line=RE Show the most recent line matching RE."), 853 N_("-q --brief Output only whether files differ."), 854 N_("-e --ed Output an ed script."), 855 N_("--normal Output a normal diff."), 856 N_("-n --rcs Output an RCS format diff."), 857 N_("-y --side-by-side Output in two columns.\n\ 858 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\ 859 --left-column Output only the left column of common lines.\n\ 860 --suppress-common-lines Do not output common lines."), 861 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."), 862 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."), 863 N_("--line-format=LFMT Similar, but format all input lines with LFMT."), 864 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."), 865 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."), 866 N_(" GFMT may contain:\n\ 867 %< lines from FILE1\n\ 868 %> lines from FILE2\n\ 869 %= lines common to FILE1 and FILE2\n\ 870 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ 871 LETTERs are as follows for new group, lower case for old group:\n\ 872 F first line number\n\ 873 L last line number\n\ 874 N number of lines = L-F+1\n\ 875 E F-1\n\ 876 M L+1"), 877 N_(" LFMT may contain:\n\ 878 %L contents of line\n\ 879 %l contents of line, excluding any trailing newline\n\ 880 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), 881 N_(" Either GFMT or LFMT may contain:\n\ 882 %% %\n\ 883 %c'C' the single character C\n\ 884 %c'\\OOO' the character with octal code OOO"), 885 "", 886 N_("-l --paginate Pass the output through `pr' to paginate it."), 887 N_("-t --expand-tabs Expand tabs to spaces in output."), 888 N_("-T --initial-tab Make tabs line up by prepending a tab."), 889 "", 890 N_("-r --recursive Recursively compare any subdirectories found."), 891 N_("-N --new-file Treat absent files as empty."), 892 N_("--unidirectional-new-file Treat absent first files as empty."), 893 N_("-s --report-identical-files Report when two files are the same."), 894 N_("-x PAT --exclude=PAT Exclude files that match PAT."), 895 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."), 896 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."), 897 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."), 898 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."), 899 "", 900 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."), 901 N_("-d --minimal Try hard to find a smaller set of changes."), 902 N_("--speed-large-files Assume large files and many scattered small changes."), 903 "", 904 N_("-v --version Output version info."), 905 N_("--help Output this help."), 906 "", 907 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."), 908 N_("If --from-file or --to-file is given, there are no restrictions on FILES."), 909 N_("If a FILE is `-', read standard input."), 910 "", 911 N_("Report bugs to <bug-gnu-utils@gnu.org>."), 912 0 913}; 914 915static void 916usage (void) 917{ 918 char const * const *p; 919 920 printf (_("Usage: %s [OPTION]... FILES\n"), program_name); 921 922 for (p = option_help_msgid; *p; p++) 923 { 924 if (!**p) 925 putchar ('\n'); 926 else 927 { 928 char const *msg = _(*p); 929 char const *nl; 930 while ((nl = strchr (msg, '\n'))) 931 { 932 int msglen = nl + 1 - msg; 933 printf (" %.*s", msglen, msg); 934 msg = nl + 1; 935 } 936 937 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); 938 } 939 } 940} 941 942/* Set VAR to VALUE, reporting an OPTION error if this is a 943 conflict. */ 944static void 945specify_value (char const **var, char const *value, char const *option) 946{ 947 if (*var && strcmp (*var, value) != 0) 948 { 949 error (0, 0, _("conflicting %s option value `%s'"), option, value); 950 try_help (0, 0); 951 } 952 *var = value; 953} 954 955/* Set the output style to STYLE, diagnosing conflicts. */ 956static void 957specify_style (enum output_style style) 958{ 959 if (output_style != style) 960 { 961 if (output_style != OUTPUT_UNSPECIFIED) 962 try_help ("conflicting output style options", 0); 963 output_style = style; 964 } 965} 966 967static char const * 968filetype (struct stat const *st) 969{ 970 /* See POSIX 1003.1-2001 for these formats. 971 972 To keep diagnostics grammatical in English, the returned string 973 must start with a consonant. */ 974 975 if (S_ISREG (st->st_mode)) 976 return st->st_size == 0 ? _("regular empty file") : _("regular file"); 977 978 if (S_ISDIR (st->st_mode)) return _("directory"); 979 980#ifdef S_ISBLK 981 if (S_ISBLK (st->st_mode)) return _("block special file"); 982#endif 983#ifdef S_ISCHR 984 if (S_ISCHR (st->st_mode)) return _("character special file"); 985#endif 986#ifdef S_ISFIFO 987 if (S_ISFIFO (st->st_mode)) return _("fifo"); 988#endif 989 /* S_ISLNK is impossible with `fstat' and `stat'. */ 990#ifdef S_ISSOCK 991 if (S_ISSOCK (st->st_mode)) return _("socket"); 992#endif 993#ifdef S_TYPEISMQ 994 if (S_TYPEISMQ (st)) return _("message queue"); 995#endif 996#ifdef S_TYPEISSEM 997 if (S_TYPEISSEM (st)) return _("semaphore"); 998#endif 999#ifdef S_TYPEISSHM 1000 if (S_TYPEISSHM (st)) return _("shared memory object"); 1001#endif 1002#ifdef S_TYPEISTMO 1003 if (S_TYPEISTMO (st)) return _("typed memory object"); 1004#endif 1005 1006 return _("weird file"); 1007} 1008 1009/* Set the last-modified time of *ST to be the current time. */ 1010 1011static void 1012set_mtime_to_now (struct stat *st) 1013{ 1014#ifdef ST_MTIM_NSEC 1015 1016# if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME 1017 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0) 1018 return; 1019# endif 1020 1021# if HAVE_GETTIMEOFDAY 1022 { 1023 struct timeval timeval; 1024 if (gettimeofday (&timeval, NULL) == 0) 1025 { 1026 st->st_mtime = timeval.tv_sec; 1027 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000; 1028 return; 1029 } 1030 } 1031# endif 1032 1033#endif /* ST_MTIM_NSEC */ 1034 1035 time (&st->st_mtime); 1036} 1037 1038/* Compare two files (or dirs) with parent comparison PARENT 1039 and names NAME0 and NAME1. 1040 (If PARENT is 0, then the first name is just NAME0, etc.) 1041 This is self-contained; it opens the files and closes them. 1042 1043 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if 1044 different, EXIT_TROUBLE if there is a problem opening them. */ 1045 1046static int 1047compare_files (struct comparison const *parent, 1048 char const *name0, 1049 char const *name1) 1050{ 1051 struct comparison cmp; 1052#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) 1053 register int f; 1054 int status = EXIT_SUCCESS; 1055 bool same_files; 1056 char *free0, *free1; 1057 1058 /* If this is directory comparison, perhaps we have a file 1059 that exists only in one of the directories. 1060 If so, just print a message to that effect. */ 1061 1062 if (! ((name0 && name1) 1063 || (unidirectional_new_file && name1) 1064 || new_file)) 1065 { 1066 char const *name = name0 == 0 ? name1 : name0; 1067 char const *dir = parent->file[name0 == 0].name; 1068 1069 /* See POSIX 1003.1-2001 for this format. */ 1070 message ("Only in %s: %s\n", dir, name); 1071 1072 /* Return EXIT_FAILURE so that diff_dirs will return 1073 EXIT_FAILURE ("some files differ"). */ 1074 return EXIT_FAILURE; 1075 } 1076 1077 memset (cmp.file, 0, sizeof cmp.file); 1078 cmp.parent = parent; 1079 1080 /* cmp.file[f].desc markers */ 1081#define NONEXISTENT (-1) /* nonexistent file */ 1082#define UNOPENED (-2) /* unopened file (e.g. directory) */ 1083#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ 1084 1085#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ 1086 1087 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED; 1088 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED; 1089 1090 /* Now record the full name of each file, including nonexistent ones. */ 1091 1092 if (name0 == 0) 1093 name0 = name1; 1094 if (name1 == 0) 1095 name1 = name0; 1096 1097 if (!parent) 1098 { 1099 free0 = 0; 1100 free1 = 0; 1101 cmp.file[0].name = name0; 1102 cmp.file[1].name = name1; 1103 } 1104 else 1105 { 1106 cmp.file[0].name = free0 1107 = dir_file_pathname (parent->file[0].name, name0); 1108 cmp.file[1].name = free1 1109 = dir_file_pathname (parent->file[1].name, name1); 1110 } 1111 1112 /* Stat the files. */ 1113 1114 for (f = 0; f < 2; f++) 1115 { 1116 if (cmp.file[f].desc != NONEXISTENT) 1117 { 1118 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) 1119 { 1120 cmp.file[f].desc = cmp.file[0].desc; 1121 cmp.file[f].stat = cmp.file[0].stat; 1122 } 1123 else if (strcmp (cmp.file[f].name, "-") == 0) 1124 { 1125 cmp.file[f].desc = STDIN_FILENO; 1126 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) 1127 cmp.file[f].desc = ERRNO_ENCODE (errno); 1128 else 1129 { 1130 if (S_ISREG (cmp.file[f].stat.st_mode)) 1131 { 1132 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR); 1133 if (pos < 0) 1134 cmp.file[f].desc = ERRNO_ENCODE (errno); 1135 else 1136 cmp.file[f].stat.st_size = 1137 MAX (0, cmp.file[f].stat.st_size - pos); 1138 } 1139 1140 /* POSIX 1003.1-2001 requires current time for 1141 stdin. */ 1142 set_mtime_to_now (&cmp.file[f].stat); 1143 } 1144 } 1145 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0) 1146 cmp.file[f].desc = ERRNO_ENCODE (errno); 1147 } 1148 } 1149 1150 /* Mark files as nonexistent at the top level as needed for -N and 1151 --unidirectional-new-file. */ 1152 if (! parent) 1153 { 1154 if ((new_file | unidirectional_new_file) 1155 && cmp.file[0].desc == ERRNO_ENCODE (ENOENT) 1156 && cmp.file[1].desc == UNOPENED) 1157 cmp.file[0].desc = NONEXISTENT; 1158 1159 if (new_file 1160 && cmp.file[0].desc == UNOPENED 1161 && cmp.file[1].desc == ERRNO_ENCODE (ENOENT)) 1162 cmp.file[1].desc = NONEXISTENT; 1163 } 1164 1165 for (f = 0; f < 2; f++) 1166 if (cmp.file[f].desc == NONEXISTENT) 1167 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; 1168 1169 for (f = 0; f < 2; f++) 1170 { 1171 int e = ERRNO_DECODE (cmp.file[f].desc); 1172 if (0 <= e) 1173 { 1174 errno = e; 1175 perror_with_name (cmp.file[f].name); 1176 status = EXIT_TROUBLE; 1177 } 1178 } 1179 1180 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) 1181 { 1182 /* If one is a directory, and it was specified in the command line, 1183 use the file in that dir with the other file's basename. */ 1184 1185 int fnm_arg = DIR_P (0); 1186 int dir_arg = 1 - fnm_arg; 1187 char const *fnm = cmp.file[fnm_arg].name; 1188 char const *dir = cmp.file[dir_arg].name; 1189 char const *filename = cmp.file[dir_arg].name = free0 1190 = dir_file_pathname (dir, base_name (fnm)); 1191 1192 if (strcmp (fnm, "-") == 0) 1193 fatal ("cannot compare `-' to a directory"); 1194 1195 if (stat (filename, &cmp.file[dir_arg].stat) != 0) 1196 { 1197 perror_with_name (filename); 1198 status = EXIT_TROUBLE; 1199 } 1200 } 1201 1202 if (status != EXIT_SUCCESS) 1203 { 1204 /* One of the files should exist but does not. */ 1205 } 1206 else if ((same_files 1207 = (cmp.file[0].desc != NONEXISTENT 1208 && cmp.file[1].desc != NONEXISTENT 1209 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) 1210 && same_file_attributes (&cmp.file[0].stat, 1211 &cmp.file[1].stat))) 1212 && no_diff_means_no_output) 1213 { 1214 /* The two named files are actually the same physical file. 1215 We know they are identical without actually reading them. */ 1216 } 1217 else if (DIR_P (0) & DIR_P (1)) 1218 { 1219 if (output_style == OUTPUT_IFDEF) 1220 fatal ("-D option not supported with directories"); 1221 1222 /* If both are directories, compare the files in them. */ 1223 1224 if (parent && !recursive) 1225 { 1226 /* But don't compare dir contents one level down 1227 unless -r was specified. 1228 See POSIX 1003.1-2001 for this format. */ 1229 message ("Common subdirectories: %s and %s\n", 1230 cmp.file[0].name, cmp.file[1].name); 1231 } 1232 else 1233 status = diff_dirs (&cmp, compare_files); 1234 } 1235 else if ((DIR_P (0) | DIR_P (1)) 1236 || (parent 1237 && (! S_ISREG (cmp.file[0].stat.st_mode) 1238 || ! S_ISREG (cmp.file[1].stat.st_mode)))) 1239 { 1240 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) 1241 { 1242 /* We have a subdirectory that exists only in one directory. */ 1243 1244 if ((DIR_P (0) | DIR_P (1)) 1245 && recursive 1246 && (new_file 1247 || (unidirectional_new_file 1248 && cmp.file[0].desc == NONEXISTENT))) 1249 status = diff_dirs (&cmp, compare_files); 1250 else 1251 { 1252 char const *dir 1253 = parent->file[cmp.file[0].desc == NONEXISTENT].name; 1254 1255 /* See POSIX 1003.1-2001 for this format. */ 1256 message ("Only in %s: %s\n", dir, name0); 1257 1258 status = EXIT_FAILURE; 1259 } 1260 } 1261 else 1262 { 1263 /* We have two files that are not to be compared. */ 1264 1265 /* See POSIX 1003.1-2001 for this format. */ 1266 message5 ("File %s is a %s while file %s is a %s\n", 1267 file_label[0] ? file_label[0] : cmp.file[0].name, 1268 filetype (&cmp.file[0].stat), 1269 file_label[1] ? file_label[1] : cmp.file[1].name, 1270 filetype (&cmp.file[1].stat)); 1271 1272 /* This is a difference. */ 1273 status = EXIT_FAILURE; 1274 } 1275 } 1276 else if (files_can_be_treated_as_binary 1277 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size 1278 && (cmp.file[0].desc == NONEXISTENT 1279 || S_ISREG (cmp.file[0].stat.st_mode)) 1280 && (cmp.file[1].desc == NONEXISTENT 1281 || S_ISREG (cmp.file[1].stat.st_mode))) 1282 { 1283 message ("Files %s and %s differ\n", 1284 file_label[0] ? file_label[0] : cmp.file[0].name, 1285 file_label[1] ? file_label[1] : cmp.file[1].name); 1286 status = EXIT_FAILURE; 1287 } 1288 else 1289 { 1290 /* Both exist and neither is a directory. */ 1291 1292 /* Open the files and record their descriptors. */ 1293 1294 if (cmp.file[0].desc == UNOPENED) 1295 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0) 1296 { 1297 perror_with_name (cmp.file[0].name); 1298 status = EXIT_TROUBLE; 1299 } 1300 if (cmp.file[1].desc == UNOPENED) 1301 { 1302 if (same_files) 1303 cmp.file[1].desc = cmp.file[0].desc; 1304 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0)) 1305 < 0) 1306 { 1307 perror_with_name (cmp.file[1].name); 1308 status = EXIT_TROUBLE; 1309 } 1310 } 1311 1312#if HAVE_SETMODE_DOS 1313 if (binary) 1314 for (f = 0; f < 2; f++) 1315 if (0 <= cmp.file[f].desc) 1316 set_binary_mode (cmp.file[f].desc, 1); 1317#endif 1318 1319 /* Compare the files, if no error was found. */ 1320 1321 if (status == EXIT_SUCCESS) 1322 status = diff_2_files (&cmp); 1323 1324 /* Close the file descriptors. */ 1325 1326 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) 1327 { 1328 perror_with_name (cmp.file[0].name); 1329 status = EXIT_TROUBLE; 1330 } 1331 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc 1332 && close (cmp.file[1].desc) != 0) 1333 { 1334 perror_with_name (cmp.file[1].name); 1335 status = EXIT_TROUBLE; 1336 } 1337 } 1338 1339 /* Now the comparison has been done, if no error prevented it, 1340 and STATUS is the value this function will return. */ 1341 1342 if (status == EXIT_SUCCESS) 1343 { 1344 if (report_identical_files && !DIR_P (0)) 1345 message ("Files %s and %s are identical\n", 1346 file_label[0] ? file_label[0] : cmp.file[0].name, 1347 file_label[1] ? file_label[1] : cmp.file[1].name); 1348 } 1349 else 1350 { 1351 /* Flush stdout so that the user sees differences immediately. 1352 This can hurt performance, unfortunately. */ 1353 if (fflush (stdout) != 0) 1354 pfatal_with_name (_("standard output")); 1355 } 1356 1357 if (free0) 1358 free (free0); 1359 if (free1) 1360 free (free1); 1361 1362 return status; 1363} 1364