1/* diff - compare files line by line
2
3   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002
4   Free Software Foundation, Inc.
5
6   This file is part of GNU DIFF.
7
8   GNU DIFF is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   GNU DIFF is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16   See the GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with GNU DIFF; see the file COPYING.
20   If not, write to the Free Software Foundation,
21   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23#define GDIFF_MAIN
24#include "diff.h"
25#include <c-stack.h>
26#include <dirname.h>
27#include <error.h>
28#include <exclude.h>
29#include <exitfail.h>
30#include <fnmatch.h>
31#include <freesoft.h>
32#include <getopt.h>
33#include <hard-locale.h>
34#include <prepargs.h>
35#include <quotesys.h>
36#include <regex.h>
37#include <setmode.h>
38#include <xalloc.h>
39
40static char const authorship_msgid[] =
41  N_("Written by Paul Eggert, Mike Haertel, David Hayes,\n\
42Richard Stallman, and Len Tower.");
43
44static char const copyright_string[] =
45  "Copyright (C) 2002 Free Software Foundation, Inc.";
46
47#ifndef GUTTER_WIDTH_MINIMUM
48# define GUTTER_WIDTH_MINIMUM 3
49#endif
50
51struct regexp_list
52{
53  char *regexps;	/* chars representing disjunction of the regexps */
54  size_t len;		/* chars used in `regexps' */
55  size_t size;		/* size malloc'ed for `regexps'; 0 if not malloc'ed */
56  bool multiple_regexps;/* Does `regexps' represent a disjunction?  */
57  struct re_pattern_buffer *buf;
58};
59
60static int compare_files (struct comparison const *, char const *, char const *);
61static void add_regexp (struct regexp_list *, char const *);
62static void summarize_regexp_list (struct regexp_list *);
63static void specify_style (enum output_style);
64static void specify_value (char const **, char const *, char const *);
65static void try_help (char const *, char const *) __attribute__((noreturn));
66static void check_stdout (void);
67static void usage (void);
68
69/* If comparing directories, compare their common subdirectories
70   recursively.  */
71static bool recursive;
72
73/* In context diffs, show previous lines that match these regexps.  */
74static struct regexp_list function_regexp_list;
75
76/* Ignore changes affecting only lines that match these regexps.  */
77static struct regexp_list ignore_regexp_list;
78
79#if HAVE_SETMODE_DOS
80/* Use binary I/O when reading and writing data (--binary).
81   On POSIX hosts, this has no effect.  */
82static bool binary;
83#endif
84
85/* When comparing directories, if a file appears only in one
86   directory, treat it as present but empty in the other (-N).
87   Then `patch' would create the file with appropriate contents.  */
88static bool new_file;
89
90/* When comparing directories, if a file appears only in the second
91   directory of the two, treat it as present but empty in the other
92   (--unidirectional-new-file).
93   Then `patch' would create the file with appropriate contents.  */
94static bool unidirectional_new_file;
95
96/* Report files compared that are the same (-s).
97   Normally nothing is output when that happens.  */
98static bool report_identical_files;
99
100
101/* Return a string containing the command options with which diff was invoked.
102   Spaces appear between what were separate ARGV-elements.
103   There is a space at the beginning but none at the end.
104   If there were no options, the result is an empty string.
105
106   Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
107   the length of that vector.  */
108
109static char *
110option_list (char **optionvec, int count)
111{
112  int i;
113  size_t size = 1;
114  char *result;
115  char *p;
116
117  for (i = 0; i < count; i++)
118    size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
119
120  p = result = xmalloc (size);
121
122  for (i = 0; i < count; i++)
123    {
124      *p++ = ' ';
125      p += quote_system_arg (p, optionvec[i]);
126    }
127
128  *p = 0;
129  return result;
130}
131
132
133/* Return an option value suitable for add_exclude.  */
134
135static int
136exclude_options (void)
137{
138  return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
139}
140
141static char const shortopts[] =
142"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
143
144/* Values for long options that do not have single-letter equivalents.  */
145enum
146{
147  BINARY_OPTION = CHAR_MAX + 1,
148  FROM_FILE_OPTION,
149  HELP_OPTION,
150  HORIZON_LINES_OPTION,
151  IGNORE_FILE_NAME_CASE_OPTION,
152  INHIBIT_HUNK_MERGE_OPTION,
153  LEFT_COLUMN_OPTION,
154  LINE_FORMAT_OPTION,
155  NO_IGNORE_FILE_NAME_CASE_OPTION,
156  NORMAL_OPTION,
157  SDIFF_MERGE_ASSIST_OPTION,
158  STRIP_TRAILING_CR_OPTION,
159  SUPPRESS_COMMON_LINES_OPTION,
160  TO_FILE_OPTION,
161
162  /* These options must be in sequence.  */
163  UNCHANGED_LINE_FORMAT_OPTION,
164  OLD_LINE_FORMAT_OPTION,
165  NEW_LINE_FORMAT_OPTION,
166
167  /* These options must be in sequence.  */
168  UNCHANGED_GROUP_FORMAT_OPTION,
169  OLD_GROUP_FORMAT_OPTION,
170  NEW_GROUP_FORMAT_OPTION,
171  CHANGED_GROUP_FORMAT_OPTION
172};
173
174static char const group_format_option[][sizeof "--unchanged-group-format"] =
175  {
176    "--unchanged-group-format",
177    "--old-group-format",
178    "--new-group-format",
179    "--changed-group-format"
180  };
181
182static char const line_format_option[][sizeof "--unchanged-line-format"] =
183  {
184    "--unchanged-line-format",
185    "--old-line-format",
186    "--new-line-format"
187  };
188
189static struct option const longopts[] =
190{
191  {"binary", 0, 0, BINARY_OPTION},
192  {"brief", 0, 0, 'q'},
193  {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
194  {"context", 2, 0, 'C'},
195  {"ed", 0, 0, 'e'},
196  {"exclude", 1, 0, 'x'},
197  {"exclude-from", 1, 0, 'X'},
198  {"expand-tabs", 0, 0, 't'},
199  {"forward-ed", 0, 0, 'f'},
200  {"from-file", 1, 0, FROM_FILE_OPTION},
201  {"help", 0, 0, HELP_OPTION},
202  {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
203  {"ifdef", 1, 0, 'D'},
204  {"ignore-all-space", 0, 0, 'w'},
205  {"ignore-blank-lines", 0, 0, 'B'},
206  {"ignore-case", 0, 0, 'i'},
207  {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
208  {"ignore-matching-lines", 1, 0, 'I'},
209  {"ignore-space-change", 0, 0, 'b'},
210  {"ignore-tab-expansion", 0, 0, 'E'},
211  {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
212  {"initial-tab", 0, 0, 'T'},
213  {"label", 1, 0, 'L'},
214  {"left-column", 0, 0, LEFT_COLUMN_OPTION},
215  {"line-format", 1, 0, LINE_FORMAT_OPTION},
216  {"minimal", 0, 0, 'd'},
217  {"new-file", 0, 0, 'N'},
218  {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
219  {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
220  {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
221  {"normal", 0, 0, NORMAL_OPTION},
222  {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
223  {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
224  {"paginate", 0, 0, 'l'},
225  {"rcs", 0, 0, 'n'},
226  {"recursive", 0, 0, 'r'},
227  {"report-identical-files", 0, 0, 's'},
228  {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
229  {"show-c-function", 0, 0, 'p'},
230  {"show-function-line", 1, 0, 'F'},
231  {"side-by-side", 0, 0, 'y'},
232  {"speed-large-files", 0, 0, 'H'},
233  {"starting-file", 1, 0, 'S'},
234  {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
235  {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
236  {"text", 0, 0, 'a'},
237  {"to-file", 1, 0, TO_FILE_OPTION},
238  {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
239  {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
240  {"unidirectional-new-file", 0, 0, 'P'},
241  {"unified", 2, 0, 'U'},
242  {"version", 0, 0, 'v'},
243  {"width", 1, 0, 'W'},
244  {0, 0, 0, 0}
245};
246
247int
248main (int argc, char **argv)
249{
250  int exit_status = EXIT_SUCCESS;
251  int c;
252  int i;
253  int prev = -1;
254  lin ocontext = -1;
255  bool explicit_context = 0;
256  int width = 0;
257  bool show_c_function = 0;
258  char const *from_file = 0;
259  char const *to_file = 0;
260  uintmax_t numval;
261  char *numend;
262
263  /* Do our initializations.  */
264  exit_failure = 2;
265  initialize_main (&argc, &argv);
266  program_name = argv[0];
267  setlocale (LC_ALL, "");
268  bindtextdomain (PACKAGE, LOCALEDIR);
269  textdomain (PACKAGE);
270  c_stack_action (c_stack_die);
271  function_regexp_list.buf = &function_regexp;
272  ignore_regexp_list.buf = &ignore_regexp;
273  re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
274  excluded = new_exclude ();
275
276  /* Decode the options.  */
277
278  while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
279    {
280      switch (c)
281	{
282	case 0:
283	  break;
284
285	case '0':
286	case '1':
287	case '2':
288	case '3':
289	case '4':
290	case '5':
291	case '6':
292	case '7':
293	case '8':
294	case '9':
295	  if (! ISDIGIT (prev))
296	    ocontext = c - '0';
297	  else if (LIN_MAX / 10 < ocontext
298		   || ((ocontext = 10 * ocontext + c - '0') < 0))
299	    ocontext = LIN_MAX;
300	  break;
301
302	case 'a':
303	  text = 1;
304	  break;
305
306	case 'b':
307	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
308	    ignore_white_space = IGNORE_SPACE_CHANGE;
309	  break;
310
311	case 'B':
312	  ignore_blank_lines = 1;
313	  break;
314
315	case 'C':		/* +context[=lines] */
316	case 'U':		/* +unified[=lines] */
317	  {
318	    if (optarg)
319	      {
320		numval = strtoumax (optarg, &numend, 10);
321		if (*numend)
322		  try_help ("invalid context length `%s'", optarg);
323		if (LIN_MAX < numval)
324		  numval = LIN_MAX;
325	      }
326	    else
327	      numval = 3;
328
329	    specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
330	    if (context < numval)
331	      context = numval;
332	    explicit_context = 1;
333	  }
334	  break;
335
336	case 'c':
337	  specify_style (OUTPUT_CONTEXT);
338	  if (context < 3)
339	    context = 3;
340	  break;
341
342	case 'd':
343	  minimal = 1;
344	  break;
345
346	case 'D':
347	  specify_style (OUTPUT_IFDEF);
348	  {
349	    static char const C_ifdef_group_formats[] =
350	      "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
351	    char *b = xmalloc (sizeof C_ifdef_group_formats
352			       + 7 * strlen (optarg) - 14 /* 7*"%s" */
353			       - 8 /* 5*"%%" + 3*"%c" */);
354	    sprintf (b, C_ifdef_group_formats,
355		     0,
356		     optarg, optarg, 0,
357		     optarg, optarg, 0,
358		     optarg, optarg, optarg);
359	    for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
360	      {
361		specify_value (&group_format[i], b, "-D");
362		b += strlen (b) + 1;
363	      }
364	  }
365	  break;
366
367	case 'e':
368	  specify_style (OUTPUT_ED);
369	  break;
370
371	case 'E':
372	  if (ignore_white_space < IGNORE_TAB_EXPANSION)
373	    ignore_white_space = IGNORE_TAB_EXPANSION;
374	  break;
375
376	case 'f':
377	  specify_style (OUTPUT_FORWARD_ED);
378	  break;
379
380	case 'F':
381	  add_regexp (&function_regexp_list, optarg);
382	  break;
383
384	case 'h':
385	  /* Split the files into chunks for faster processing.
386	     Usually does not change the result.
387
388	     This currently has no effect.  */
389	  break;
390
391	case 'H':
392	  speed_large_files = 1;
393	  break;
394
395	case 'i':
396	  ignore_case = 1;
397	  break;
398
399	case 'I':
400	  add_regexp (&ignore_regexp_list, optarg);
401	  break;
402
403	case 'l':
404	  if (!pr_program[0])
405	    try_help ("pagination not supported on this host", 0);
406	  paginate = 1;
407#ifdef SIGCHLD
408	  /* Pagination requires forking and waiting, and
409	     System V fork+wait does not work if SIGCHLD is ignored.  */
410	  signal (SIGCHLD, SIG_DFL);
411#endif
412	  break;
413
414	case 'L':
415	  if (!file_label[0])
416	    file_label[0] = optarg;
417	  else if (!file_label[1])
418	    file_label[1] = optarg;
419	  else
420	    fatal ("too many file label options");
421	  break;
422
423	case 'n':
424	  specify_style (OUTPUT_RCS);
425	  break;
426
427	case 'N':
428	  new_file = 1;
429	  break;
430
431	case 'p':
432	  show_c_function = 1;
433	  add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
434	  break;
435
436	case 'P':
437	  unidirectional_new_file = 1;
438	  break;
439
440	case 'q':
441	  brief = 1;
442	  break;
443
444	case 'r':
445	  recursive = 1;
446	  break;
447
448	case 's':
449	  report_identical_files = 1;
450	  break;
451
452	case 'S':
453	  specify_value (&starting_file, optarg, "-S");
454	  break;
455
456	case 't':
457	  expand_tabs = 1;
458	  break;
459
460	case 'T':
461	  initial_tab = 1;
462	  break;
463
464	case 'u':
465	  specify_style (OUTPUT_UNIFIED);
466	  if (context < 3)
467	    context = 3;
468	  break;
469
470	case 'v':
471	  printf ("diff %s\n%s\n\n%s\n\n%s\n",
472		  version_string, copyright_string,
473		  _(free_software_msgid), _(authorship_msgid));
474	  check_stdout ();
475	  return EXIT_SUCCESS;
476
477	case 'w':
478	  ignore_white_space = IGNORE_ALL_SPACE;
479	  break;
480
481	case 'x':
482	  add_exclude (excluded, optarg, exclude_options ());
483	  break;
484
485	case 'X':
486	  if (add_exclude_file (add_exclude, excluded, optarg,
487				exclude_options (), '\n'))
488	    pfatal_with_name (optarg);
489	  break;
490
491	case 'y':
492	  specify_style (OUTPUT_SDIFF);
493	  break;
494
495	case 'W':
496	  numval = strtoumax (optarg, &numend, 10);
497	  if (! (0 < numval && numval <= INT_MAX) || *numend)
498	    try_help ("invalid width `%s'", optarg);
499	  if (width != numval)
500	    {
501	      if (width)
502		fatal ("conflicting width options");
503	      width = numval;
504	    }
505	  break;
506
507	case BINARY_OPTION:
508#if HAVE_SETMODE_DOS
509	  binary = 1;
510	  set_binary_mode (STDOUT_FILENO, 1);
511#endif
512	  break;
513
514	case FROM_FILE_OPTION:
515	  specify_value (&from_file, optarg, "--from-file");
516	  break;
517
518	case HELP_OPTION:
519	  usage ();
520	  check_stdout ();
521	  return EXIT_SUCCESS;
522
523	case HORIZON_LINES_OPTION:
524	  numval = strtoumax (optarg, &numend, 10);
525	  if (*numend)
526	    try_help ("invalid horizon length `%s'", optarg);
527	  horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
528	  break;
529
530	case IGNORE_FILE_NAME_CASE_OPTION:
531	  ignore_file_name_case = 1;
532	  break;
533
534	case INHIBIT_HUNK_MERGE_OPTION:
535	  /* This option is obsolete, but accept it for backward
536             compatibility.  */
537	  break;
538
539	case LEFT_COLUMN_OPTION:
540	  left_column = 1;
541	  break;
542
543	case LINE_FORMAT_OPTION:
544	  specify_style (OUTPUT_IFDEF);
545	  for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
546	    specify_value (&line_format[i], optarg, "--line-format");
547	  break;
548
549	case NO_IGNORE_FILE_NAME_CASE_OPTION:
550	  ignore_file_name_case = 0;
551	  break;
552
553	case NORMAL_OPTION:
554	  specify_style (OUTPUT_NORMAL);
555	  break;
556
557	case SDIFF_MERGE_ASSIST_OPTION:
558	  specify_style (OUTPUT_SDIFF);
559	  sdiff_merge_assist = 1;
560	  break;
561
562	case STRIP_TRAILING_CR_OPTION:
563	  strip_trailing_cr = 1;
564	  break;
565
566	case SUPPRESS_COMMON_LINES_OPTION:
567	  suppress_common_lines = 1;
568	  break;
569
570	case TO_FILE_OPTION:
571	  specify_value (&to_file, optarg, "--to-file");
572	  break;
573
574	case UNCHANGED_LINE_FORMAT_OPTION:
575	case OLD_LINE_FORMAT_OPTION:
576	case NEW_LINE_FORMAT_OPTION:
577	  specify_style (OUTPUT_IFDEF);
578	  c -= UNCHANGED_LINE_FORMAT_OPTION;
579	  specify_value (&line_format[c], optarg, line_format_option[c]);
580	  break;
581
582	case UNCHANGED_GROUP_FORMAT_OPTION:
583	case OLD_GROUP_FORMAT_OPTION:
584	case NEW_GROUP_FORMAT_OPTION:
585	case CHANGED_GROUP_FORMAT_OPTION:
586	  specify_style (OUTPUT_IFDEF);
587	  c -= UNCHANGED_GROUP_FORMAT_OPTION;
588	  specify_value (&group_format[c], optarg, group_format_option[c]);
589	  break;
590
591	default:
592	  try_help (0, 0);
593	}
594      prev = c;
595    }
596
597  if (output_style == OUTPUT_UNSPECIFIED)
598    {
599      if (show_c_function)
600	{
601	  specify_style (OUTPUT_CONTEXT);
602	  if (ocontext < 0)
603	    context = 3;
604	}
605      else
606	specify_style (OUTPUT_NORMAL);
607    }
608
609  if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
610    time_format = "%Y-%m-%d %H:%M:%S.%N %z";
611  else
612    {
613      /* See POSIX 1003.1-2001 for this format.  */
614      time_format = "%a %b %e %T %Y";
615    }
616
617  if (0 <= ocontext)
618    {
619      bool modern_usage = 200112 <= posix2_version ();
620
621      if ((output_style == OUTPUT_CONTEXT
622	   || output_style == OUTPUT_UNIFIED)
623	  && (context < ocontext
624	      || (ocontext < context && ! explicit_context)))
625	{
626	  if (modern_usage)
627	    {
628	      error (0, 0,
629		     _("`-%ld' option is obsolete; use `-%c %ld'"),
630		     (long) ocontext,
631		     output_style == OUTPUT_CONTEXT ? 'C' : 'U',
632		     (long) ocontext);
633	      try_help (0, 0);
634	    }
635	  context = ocontext;
636	}
637      else
638	{
639	  if (modern_usage)
640	    {
641	      error (0, 0, _("`-%ld' option is obsolete; omit it"),
642		     (long) ocontext);
643	      try_help (0, 0);
644	    }
645	}
646    }
647
648  {
649    /*
650     *	We maximize first the half line width, and then the gutter width,
651     *	according to the following constraints:
652     *	1.  Two half lines plus a gutter must fit in a line.
653     *	2.  If the half line width is nonzero:
654     *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
655     *	    b.  If tabs are not expanded to spaces,
656     *		a half line plus a gutter is an integral number of tabs,
657     *		so that tabs in the right column line up.
658     */
659    unsigned int t = expand_tabs ? 1 : TAB_WIDTH;
660    int w = width ? width : 130;
661    int off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t)  *  t;
662    sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
663    sdiff_column2_offset = sdiff_half_width ? off : w;
664  }
665
666  /* Make the horizon at least as large as the context, so that
667     shift_boundaries has more freedom to shift the first and last hunks.  */
668  if (horizon_lines < context)
669    horizon_lines = context;
670
671  summarize_regexp_list (&function_regexp_list);
672  summarize_regexp_list (&ignore_regexp_list);
673
674  if (output_style == OUTPUT_IFDEF)
675    {
676      for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
677	if (!line_format[i])
678	  line_format[i] = "%l\n";
679      if (!group_format[OLD])
680	group_format[OLD]
681	  = group_format[CHANGED] ? group_format[CHANGED] : "%<";
682      if (!group_format[NEW])
683	group_format[NEW]
684	  = group_format[CHANGED] ? group_format[CHANGED] : "%>";
685      if (!group_format[UNCHANGED])
686	group_format[UNCHANGED] = "%=";
687      if (!group_format[CHANGED])
688	group_format[CHANGED] = concat (group_format[OLD],
689					group_format[NEW], "");
690    }
691
692  no_diff_means_no_output =
693    (output_style == OUTPUT_IFDEF ?
694      (!*group_format[UNCHANGED]
695       || (strcmp (group_format[UNCHANGED], "%=") == 0
696	   && !*line_format[UNCHANGED]))
697     : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
698
699  files_can_be_treated_as_binary =
700    (brief
701     & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
702	  | (ignore_regexp_list.regexps || ignore_white_space)));
703
704  switch_string = option_list (argv + 1, optind - 1);
705
706  if (from_file)
707    {
708      if (to_file)
709	fatal ("--from-file and --to-file both specified");
710      else
711	for (; optind < argc; optind++)
712	  {
713	    int status = compare_files ((struct comparison *) 0,
714					from_file, argv[optind]);
715	    if (exit_status < status)
716	      exit_status = status;
717	  }
718    }
719  else
720    {
721      if (to_file)
722	for (; optind < argc; optind++)
723	  {
724	    int status = compare_files ((struct comparison *) 0,
725					argv[optind], to_file);
726	    if (exit_status < status)
727	      exit_status = status;
728	  }
729      else
730	{
731	  if (argc - optind != 2)
732	    {
733	      if (argc - optind < 2)
734		try_help ("missing operand after `%s'", argv[argc - 1]);
735	      else
736		try_help ("extra operand `%s'", argv[optind + 2]);
737	    }
738
739	  exit_status = compare_files ((struct comparison *) 0,
740				       argv[optind], argv[optind + 1]);
741	}
742    }
743
744  /* Print any messages that were saved up for last.  */
745  print_message_queue ();
746
747  check_stdout ();
748  exit (exit_status);
749  return exit_status;
750}
751
752/* Append to REGLIST the regexp PATTERN.  */
753
754static void
755add_regexp (struct regexp_list *reglist, char const *pattern)
756{
757  size_t patlen = strlen (pattern);
758  char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
759
760  if (m != 0)
761    error (0, 0, "%s: %s", pattern, m);
762  else
763    {
764      char *regexps = reglist->regexps;
765      size_t len = reglist->len;
766      bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
767      size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
768      size_t size = reglist->size;
769
770      if (size <= newlen)
771	{
772	  if (!size)
773	    size = 1;
774
775	  do size *= 2;
776	  while (size <= newlen);
777
778	  reglist->size = size;
779	  reglist->regexps = regexps = xrealloc (regexps, size);
780	}
781      if (multiple_regexps)
782	{
783	  regexps[len++] = '\\';
784	  regexps[len++] = '|';
785	}
786      memcpy (regexps + len, pattern, patlen + 1);
787    }
788}
789
790/* Ensure that REGLIST represents the disjunction of its regexps.
791   This is done here, rather than earlier, to avoid O(N^2) behavior.  */
792
793static void
794summarize_regexp_list (struct regexp_list *reglist)
795{
796  if (reglist->regexps)
797    {
798      /* At least one regexp was specified.  Allocate a fastmap for it.  */
799      reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
800      if (reglist->multiple_regexps)
801	{
802	  /* Compile the disjunction of the regexps.
803	     (If just one regexp was specified, it is already compiled.)  */
804	  char const *m = re_compile_pattern (reglist->regexps, reglist->len,
805					      reglist->buf);
806	  if (m != 0)
807	    error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
808	}
809    }
810}
811
812static void
813try_help (char const *reason_msgid, char const *operand)
814{
815  if (reason_msgid)
816    error (0, 0, _(reason_msgid), operand);
817  error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
818	 program_name);
819  abort ();
820}
821
822static void
823check_stdout (void)
824{
825  if (ferror (stdout))
826    fatal ("write failed");
827  else if (fclose (stdout) != 0)
828    pfatal_with_name (_("standard output"));
829}
830
831static char const * const option_help_msgid[] = {
832  N_("Compare files line by line."),
833  "",
834  N_("-i  --ignore-case  Ignore case differences in file contents."),
835  N_("--ignore-file-name-case  Ignore case when comparing file names."),
836  N_("--no-ignore-file-name-case  Consider case when comparing file names."),
837  N_("-E  --ignore-tab-expansion  Ignore changes due to tab expansion."),
838  N_("-b  --ignore-space-change  Ignore changes in the amount of white space."),
839  N_("-w  --ignore-all-space  Ignore all white space."),
840  N_("-B  --ignore-blank-lines  Ignore changes whose lines are all blank."),
841  N_("-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE."),
842  N_("--strip-trailing-cr  Strip trailing carriage return on input."),
843#if HAVE_SETMODE_DOS
844  N_("--binary  Read and write data in binary mode."),
845#endif
846  N_("-a  --text  Treat all files as text."),
847  "",
848  N_("-c  -C NUM  --context[=NUM]  Output NUM (default 3) lines of copied context.\n\
849-u  -U NUM  --unified[=NUM]  Output NUM (default 3) lines of unified context.\n\
850  --label LABEL  Use LABEL instead of file name.\n\
851  -p  --show-c-function  Show which C function each change is in.\n\
852  -F RE  --show-function-line=RE  Show the most recent line matching RE."),
853  N_("-q  --brief  Output only whether files differ."),
854  N_("-e  --ed  Output an ed script."),
855  N_("--normal  Output a normal diff."),
856  N_("-n  --rcs  Output an RCS format diff."),
857  N_("-y  --side-by-side  Output in two columns.\n\
858  -W NUM  --width=NUM  Output at most NUM (default 130) print columns.\n\
859  --left-column  Output only the left column of common lines.\n\
860  --suppress-common-lines  Do not output common lines."),
861  N_("-D NAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs."),
862  N_("--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT."),
863  N_("--line-format=LFMT  Similar, but format all input lines with LFMT."),
864  N_("--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT."),
865  N_("  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'."),
866  N_("  GFMT may contain:\n\
867    %<  lines from FILE1\n\
868    %>  lines from FILE2\n\
869    %=  lines common to FILE1 and FILE2\n\
870    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER\n\
871      LETTERs are as follows for new group, lower case for old group:\n\
872        F  first line number\n\
873        L  last line number\n\
874        N  number of lines = L-F+1\n\
875        E  F-1\n\
876        M  L+1"),
877  N_("  LFMT may contain:\n\
878    %L  contents of line\n\
879    %l  contents of line, excluding any trailing newline\n\
880    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number"),
881  N_("  Either GFMT or LFMT may contain:\n\
882    %%  %\n\
883    %c'C'  the single character C\n\
884    %c'\\OOO'  the character with octal code OOO"),
885  "",
886  N_("-l  --paginate  Pass the output through `pr' to paginate it."),
887  N_("-t  --expand-tabs  Expand tabs to spaces in output."),
888  N_("-T  --initial-tab  Make tabs line up by prepending a tab."),
889  "",
890  N_("-r  --recursive  Recursively compare any subdirectories found."),
891  N_("-N  --new-file  Treat absent files as empty."),
892  N_("--unidirectional-new-file  Treat absent first files as empty."),
893  N_("-s  --report-identical-files  Report when two files are the same."),
894  N_("-x PAT  --exclude=PAT  Exclude files that match PAT."),
895  N_("-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE."),
896  N_("-S FILE  --starting-file=FILE  Start with FILE when comparing directories."),
897  N_("--from-file=FILE1  Compare FILE1 to all operands.  FILE1 can be a directory."),
898  N_("--to-file=FILE2  Compare all operands to FILE2.  FILE2 can be a directory."),
899  "",
900  N_("--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix."),
901  N_("-d  --minimal  Try hard to find a smaller set of changes."),
902  N_("--speed-large-files  Assume large files and many scattered small changes."),
903  "",
904  N_("-v  --version  Output version info."),
905  N_("--help  Output this help."),
906  "",
907  N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
908  N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
909  N_("If a FILE is `-', read standard input."),
910  "",
911  N_("Report bugs to <bug-gnu-utils@gnu.org>."),
912  0
913};
914
915static void
916usage (void)
917{
918  char const * const *p;
919
920  printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
921
922  for (p = option_help_msgid;  *p;  p++)
923    {
924      if (!**p)
925	putchar ('\n');
926      else
927	{
928	  char const *msg = _(*p);
929	  char const *nl;
930	  while ((nl = strchr (msg, '\n')))
931	    {
932	      int msglen = nl + 1 - msg;
933	      printf ("  %.*s", msglen, msg);
934	      msg = nl + 1;
935	    }
936
937	  printf ("  %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
938	}
939    }
940}
941
942/* Set VAR to VALUE, reporting an OPTION error if this is a
943   conflict.  */
944static void
945specify_value (char const **var, char const *value, char const *option)
946{
947  if (*var && strcmp (*var, value) != 0)
948    {
949      error (0, 0, _("conflicting %s option value `%s'"), option, value);
950      try_help (0, 0);
951    }
952  *var = value;
953}
954
955/* Set the output style to STYLE, diagnosing conflicts.  */
956static void
957specify_style (enum output_style style)
958{
959  if (output_style != style)
960    {
961      if (output_style != OUTPUT_UNSPECIFIED)
962	try_help ("conflicting output style options", 0);
963      output_style = style;
964    }
965}
966
967static char const *
968filetype (struct stat const *st)
969{
970  /* See POSIX 1003.1-2001 for these formats.
971
972     To keep diagnostics grammatical in English, the returned string
973     must start with a consonant.  */
974
975  if (S_ISREG (st->st_mode))
976    return st->st_size == 0 ? _("regular empty file") : _("regular file");
977
978  if (S_ISDIR (st->st_mode)) return _("directory");
979
980#ifdef S_ISBLK
981  if (S_ISBLK (st->st_mode)) return _("block special file");
982#endif
983#ifdef S_ISCHR
984  if (S_ISCHR (st->st_mode)) return _("character special file");
985#endif
986#ifdef S_ISFIFO
987  if (S_ISFIFO (st->st_mode)) return _("fifo");
988#endif
989  /* S_ISLNK is impossible with `fstat' and `stat'.  */
990#ifdef S_ISSOCK
991  if (S_ISSOCK (st->st_mode)) return _("socket");
992#endif
993#ifdef S_TYPEISMQ
994  if (S_TYPEISMQ (st)) return _("message queue");
995#endif
996#ifdef S_TYPEISSEM
997  if (S_TYPEISSEM (st)) return _("semaphore");
998#endif
999#ifdef S_TYPEISSHM
1000  if (S_TYPEISSHM (st)) return _("shared memory object");
1001#endif
1002#ifdef S_TYPEISTMO
1003  if (S_TYPEISTMO (st)) return _("typed memory object");
1004#endif
1005
1006  return _("weird file");
1007}
1008
1009/* Set the last-modified time of *ST to be the current time.  */
1010
1011static void
1012set_mtime_to_now (struct stat *st)
1013{
1014#ifdef ST_MTIM_NSEC
1015
1016# if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1017  if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1018    return;
1019# endif
1020
1021# if HAVE_GETTIMEOFDAY
1022  {
1023    struct timeval timeval;
1024    if (gettimeofday (&timeval, NULL) == 0)
1025      {
1026	st->st_mtime = timeval.tv_sec;
1027	st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1028	return;
1029      }
1030  }
1031# endif
1032
1033#endif /* ST_MTIM_NSEC */
1034
1035  time (&st->st_mtime);
1036}
1037
1038/* Compare two files (or dirs) with parent comparison PARENT
1039   and names NAME0 and NAME1.
1040   (If PARENT is 0, then the first name is just NAME0, etc.)
1041   This is self-contained; it opens the files and closes them.
1042
1043   Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1044   different, EXIT_TROUBLE if there is a problem opening them.  */
1045
1046static int
1047compare_files (struct comparison const *parent,
1048	       char const *name0,
1049	       char const *name1)
1050{
1051  struct comparison cmp;
1052#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1053  register int f;
1054  int status = EXIT_SUCCESS;
1055  bool same_files;
1056  char *free0, *free1;
1057
1058  /* If this is directory comparison, perhaps we have a file
1059     that exists only in one of the directories.
1060     If so, just print a message to that effect.  */
1061
1062  if (! ((name0 && name1)
1063	 || (unidirectional_new_file && name1)
1064	 || new_file))
1065    {
1066      char const *name = name0 == 0 ? name1 : name0;
1067      char const *dir = parent->file[name0 == 0].name;
1068
1069      /* See POSIX 1003.1-2001 for this format.  */
1070      message ("Only in %s: %s\n", dir, name);
1071
1072      /* Return EXIT_FAILURE so that diff_dirs will return
1073	 EXIT_FAILURE ("some files differ").  */
1074      return EXIT_FAILURE;
1075    }
1076
1077  memset (cmp.file, 0, sizeof cmp.file);
1078  cmp.parent = parent;
1079
1080  /* cmp.file[f].desc markers */
1081#define NONEXISTENT (-1) /* nonexistent file */
1082#define UNOPENED (-2) /* unopened file (e.g. directory) */
1083#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1084
1085#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1086
1087  cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1088  cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1089
1090  /* Now record the full name of each file, including nonexistent ones.  */
1091
1092  if (name0 == 0)
1093    name0 = name1;
1094  if (name1 == 0)
1095    name1 = name0;
1096
1097  if (!parent)
1098    {
1099      free0 = 0;
1100      free1 = 0;
1101      cmp.file[0].name = name0;
1102      cmp.file[1].name = name1;
1103    }
1104  else
1105    {
1106      cmp.file[0].name = free0
1107	= dir_file_pathname (parent->file[0].name, name0);
1108      cmp.file[1].name = free1
1109	= dir_file_pathname (parent->file[1].name, name1);
1110    }
1111
1112  /* Stat the files.  */
1113
1114  for (f = 0; f < 2; f++)
1115    {
1116      if (cmp.file[f].desc != NONEXISTENT)
1117	{
1118	  if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1119	    {
1120	      cmp.file[f].desc = cmp.file[0].desc;
1121	      cmp.file[f].stat = cmp.file[0].stat;
1122	    }
1123	  else if (strcmp (cmp.file[f].name, "-") == 0)
1124	    {
1125	      cmp.file[f].desc = STDIN_FILENO;
1126	      if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1127		cmp.file[f].desc = ERRNO_ENCODE (errno);
1128	      else
1129		{
1130		  if (S_ISREG (cmp.file[f].stat.st_mode))
1131		    {
1132		      off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1133		      if (pos < 0)
1134			cmp.file[f].desc = ERRNO_ENCODE (errno);
1135		      else
1136			cmp.file[f].stat.st_size =
1137			  MAX (0, cmp.file[f].stat.st_size - pos);
1138		    }
1139
1140		  /* POSIX 1003.1-2001 requires current time for
1141		     stdin.  */
1142		  set_mtime_to_now (&cmp.file[f].stat);
1143		}
1144	    }
1145	  else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1146	    cmp.file[f].desc = ERRNO_ENCODE (errno);
1147	}
1148    }
1149
1150  /* Mark files as nonexistent at the top level as needed for -N and
1151     --unidirectional-new-file.  */
1152  if (! parent)
1153    {
1154      if ((new_file | unidirectional_new_file)
1155	  && cmp.file[0].desc == ERRNO_ENCODE (ENOENT)
1156	  && cmp.file[1].desc == UNOPENED)
1157	cmp.file[0].desc = NONEXISTENT;
1158
1159      if (new_file
1160	  && cmp.file[0].desc == UNOPENED
1161	  && cmp.file[1].desc == ERRNO_ENCODE (ENOENT))
1162	cmp.file[1].desc = NONEXISTENT;
1163    }
1164
1165  for (f = 0; f < 2; f++)
1166    if (cmp.file[f].desc == NONEXISTENT)
1167      cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1168
1169  for (f = 0; f < 2; f++)
1170    {
1171      int e = ERRNO_DECODE (cmp.file[f].desc);
1172      if (0 <= e)
1173	{
1174	  errno = e;
1175	  perror_with_name (cmp.file[f].name);
1176	  status = EXIT_TROUBLE;
1177	}
1178    }
1179
1180  if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1181    {
1182      /* If one is a directory, and it was specified in the command line,
1183	 use the file in that dir with the other file's basename.  */
1184
1185      int fnm_arg = DIR_P (0);
1186      int dir_arg = 1 - fnm_arg;
1187      char const *fnm = cmp.file[fnm_arg].name;
1188      char const *dir = cmp.file[dir_arg].name;
1189      char const *filename = cmp.file[dir_arg].name = free0
1190	= dir_file_pathname (dir, base_name (fnm));
1191
1192      if (strcmp (fnm, "-") == 0)
1193	fatal ("cannot compare `-' to a directory");
1194
1195      if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1196	{
1197	  perror_with_name (filename);
1198	  status = EXIT_TROUBLE;
1199	}
1200    }
1201
1202  if (status != EXIT_SUCCESS)
1203    {
1204      /* One of the files should exist but does not.  */
1205    }
1206  else if ((same_files
1207	    = (cmp.file[0].desc != NONEXISTENT
1208	       && cmp.file[1].desc != NONEXISTENT
1209	       && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1210	       && same_file_attributes (&cmp.file[0].stat,
1211					&cmp.file[1].stat)))
1212	   && no_diff_means_no_output)
1213    {
1214      /* The two named files are actually the same physical file.
1215	 We know they are identical without actually reading them.  */
1216    }
1217  else if (DIR_P (0) & DIR_P (1))
1218    {
1219      if (output_style == OUTPUT_IFDEF)
1220	fatal ("-D option not supported with directories");
1221
1222      /* If both are directories, compare the files in them.  */
1223
1224      if (parent && !recursive)
1225	{
1226	  /* But don't compare dir contents one level down
1227	     unless -r was specified.
1228	     See POSIX 1003.1-2001 for this format.  */
1229	  message ("Common subdirectories: %s and %s\n",
1230		   cmp.file[0].name, cmp.file[1].name);
1231	}
1232      else
1233	status = diff_dirs (&cmp, compare_files);
1234    }
1235  else if ((DIR_P (0) | DIR_P (1))
1236	   || (parent
1237	       && (! S_ISREG (cmp.file[0].stat.st_mode)
1238		   || ! S_ISREG (cmp.file[1].stat.st_mode))))
1239    {
1240      if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1241	{
1242	  /* We have a subdirectory that exists only in one directory.  */
1243
1244	  if ((DIR_P (0) | DIR_P (1))
1245	      && recursive
1246	      && (new_file
1247		  || (unidirectional_new_file
1248		      && cmp.file[0].desc == NONEXISTENT)))
1249	    status = diff_dirs (&cmp, compare_files);
1250	  else
1251	    {
1252	      char const *dir
1253		= parent->file[cmp.file[0].desc == NONEXISTENT].name;
1254
1255	      /* See POSIX 1003.1-2001 for this format.  */
1256	      message ("Only in %s: %s\n", dir, name0);
1257
1258	      status = EXIT_FAILURE;
1259	    }
1260	}
1261      else
1262	{
1263	  /* We have two files that are not to be compared.  */
1264
1265	  /* See POSIX 1003.1-2001 for this format.  */
1266	  message5 ("File %s is a %s while file %s is a %s\n",
1267		    file_label[0] ? file_label[0] : cmp.file[0].name,
1268		    filetype (&cmp.file[0].stat),
1269		    file_label[1] ? file_label[1] : cmp.file[1].name,
1270		    filetype (&cmp.file[1].stat));
1271
1272	  /* This is a difference.  */
1273	  status = EXIT_FAILURE;
1274	}
1275    }
1276  else if (files_can_be_treated_as_binary
1277	   && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
1278	   && (cmp.file[0].desc == NONEXISTENT
1279	       || S_ISREG (cmp.file[0].stat.st_mode))
1280	   && (cmp.file[1].desc == NONEXISTENT
1281	       || S_ISREG (cmp.file[1].stat.st_mode)))
1282    {
1283      message ("Files %s and %s differ\n",
1284	       file_label[0] ? file_label[0] : cmp.file[0].name,
1285	       file_label[1] ? file_label[1] : cmp.file[1].name);
1286      status = EXIT_FAILURE;
1287    }
1288  else
1289    {
1290      /* Both exist and neither is a directory.  */
1291
1292      /* Open the files and record their descriptors.  */
1293
1294      if (cmp.file[0].desc == UNOPENED)
1295	if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1296	  {
1297	    perror_with_name (cmp.file[0].name);
1298	    status = EXIT_TROUBLE;
1299	  }
1300      if (cmp.file[1].desc == UNOPENED)
1301	{
1302	  if (same_files)
1303	    cmp.file[1].desc = cmp.file[0].desc;
1304	  else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1305		   < 0)
1306	    {
1307	      perror_with_name (cmp.file[1].name);
1308	      status = EXIT_TROUBLE;
1309	    }
1310	}
1311
1312#if HAVE_SETMODE_DOS
1313      if (binary)
1314	for (f = 0; f < 2; f++)
1315	  if (0 <= cmp.file[f].desc)
1316	    set_binary_mode (cmp.file[f].desc, 1);
1317#endif
1318
1319      /* Compare the files, if no error was found.  */
1320
1321      if (status == EXIT_SUCCESS)
1322	status = diff_2_files (&cmp);
1323
1324      /* Close the file descriptors.  */
1325
1326      if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1327	{
1328	  perror_with_name (cmp.file[0].name);
1329	  status = EXIT_TROUBLE;
1330	}
1331      if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1332	  && close (cmp.file[1].desc) != 0)
1333	{
1334	  perror_with_name (cmp.file[1].name);
1335	  status = EXIT_TROUBLE;
1336	}
1337    }
1338
1339  /* Now the comparison has been done, if no error prevented it,
1340     and STATUS is the value this function will return.  */
1341
1342  if (status == EXIT_SUCCESS)
1343    {
1344      if (report_identical_files && !DIR_P (0))
1345	message ("Files %s and %s are identical\n",
1346		 file_label[0] ? file_label[0] : cmp.file[0].name,
1347		 file_label[1] ? file_label[1] : cmp.file[1].name);
1348    }
1349  else
1350    {
1351      /* Flush stdout so that the user sees differences immediately.
1352	 This can hurt performance, unfortunately.  */
1353      if (fflush (stdout) != 0)
1354	pfatal_with_name (_("standard output"));
1355    }
1356
1357  if (free0)
1358    free (free0);
1359  if (free1)
1360    free (free1);
1361
1362  return status;
1363}
1364