1/* diff - compare files line by line
2
3   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002,
4   2004 Free Software Foundation, Inc.
5
6   This file is part of GNU DIFF.
7
8   GNU DIFF is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   GNU DIFF is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16   See the GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with GNU DIFF; see the file COPYING.
20   If not, write to the Free Software Foundation,
21   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23#define GDIFF_MAIN
24#include "diff.h"
25#include "paths.h"
26#include <c-stack.h>
27#include <dirname.h>
28#include <error.h>
29#include <exclude.h>
30#include <exit.h>
31#include <exitfail.h>
32#include <file-type.h>
33#include <fnmatch.h>
34#include <getopt.h>
35#include <hard-locale.h>
36#include <posixver.h>
37#include <prepargs.h>
38#include <quotesys.h>
39#include <setmode.h>
40#include <version-etc.h>
41#include <xalloc.h>
42
43#ifndef GUTTER_WIDTH_MINIMUM
44# define GUTTER_WIDTH_MINIMUM 3
45#endif
46
47struct regexp_list
48{
49  char *regexps;	/* chars representing disjunction of the regexps */
50  size_t len;		/* chars used in `regexps' */
51  size_t size;		/* size malloc'ed for `regexps'; 0 if not malloc'ed */
52  bool multiple_regexps;/* Does `regexps' represent a disjunction?  */
53  struct re_pattern_buffer *buf;
54};
55
56static int compare_files (struct comparison const *, char const *, char const *);
57static void add_regexp (struct regexp_list *, char const *);
58static void summarize_regexp_list (struct regexp_list *);
59static void specify_style (enum output_style);
60static void specify_value (char const **, char const *, char const *);
61static void try_help (char const *, char const *) __attribute__((noreturn));
62static void check_stdout (void);
63static void usage (void);
64
65/* If comparing directories, compare their common subdirectories
66   recursively.  */
67static bool recursive;
68
69/* In context diffs, show previous lines that match these regexps.  */
70static struct regexp_list function_regexp_list;
71
72/* Ignore changes affecting only lines that match these regexps.  */
73static struct regexp_list ignore_regexp_list;
74
75#if HAVE_SETMODE_DOS
76/* Use binary I/O when reading and writing data (--binary).
77   On POSIX hosts, this has no effect.  */
78static bool binary;
79#else
80enum { binary = true };
81#endif
82
83/* When comparing directories, if a file appears only in one
84   directory, treat it as present but empty in the other (-N).
85   Then `patch' would create the file with appropriate contents.  */
86static bool new_file;
87
88/* When comparing directories, if a file appears only in the second
89   directory of the two, treat it as present but empty in the other
90   (--unidirectional-new-file).
91   Then `patch' would create the file with appropriate contents.  */
92static bool unidirectional_new_file;
93
94/* Report files compared that are the same (-s).
95   Normally nothing is output when that happens.  */
96static bool report_identical_files;
97
98
99/* Return a string containing the command options with which diff was invoked.
100   Spaces appear between what were separate ARGV-elements.
101   There is a space at the beginning but none at the end.
102   If there were no options, the result is an empty string.
103
104   Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
105   the length of that vector.  */
106
107static char *
108option_list (char **optionvec, int count)
109{
110  int i;
111  size_t size = 1;
112  char *result;
113  char *p;
114
115  for (i = 0; i < count; i++)
116    size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
117
118  p = result = xmalloc (size);
119
120  for (i = 0; i < count; i++)
121    {
122      *p++ = ' ';
123      p += quote_system_arg (p, optionvec[i]);
124    }
125
126  *p = 0;
127  return result;
128}
129
130
131/* Return an option value suitable for add_exclude.  */
132
133static int
134exclude_options (void)
135{
136  return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
137}
138
139static char const shortopts[] =
140"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
141
142/* Values for long options that do not have single-letter equivalents.  */
143enum
144{
145  BINARY_OPTION = CHAR_MAX + 1,
146  FROM_FILE_OPTION,
147  HELP_OPTION,
148  HORIZON_LINES_OPTION,
149  IGNORE_FILE_NAME_CASE_OPTION,
150  INHIBIT_HUNK_MERGE_OPTION,
151  LEFT_COLUMN_OPTION,
152  LINE_FORMAT_OPTION,
153  NO_IGNORE_FILE_NAME_CASE_OPTION,
154  NORMAL_OPTION,
155  SDIFF_MERGE_ASSIST_OPTION,
156  STRIP_TRAILING_CR_OPTION,
157  SUPPRESS_COMMON_LINES_OPTION,
158  TABSIZE_OPTION,
159  TO_FILE_OPTION,
160
161  /* These options must be in sequence.  */
162  UNCHANGED_LINE_FORMAT_OPTION,
163  OLD_LINE_FORMAT_OPTION,
164  NEW_LINE_FORMAT_OPTION,
165
166  /* These options must be in sequence.  */
167  UNCHANGED_GROUP_FORMAT_OPTION,
168  OLD_GROUP_FORMAT_OPTION,
169  NEW_GROUP_FORMAT_OPTION,
170  CHANGED_GROUP_FORMAT_OPTION
171};
172
173static char const group_format_option[][sizeof "--unchanged-group-format"] =
174  {
175    "--unchanged-group-format",
176    "--old-group-format",
177    "--new-group-format",
178    "--changed-group-format"
179  };
180
181static char const line_format_option[][sizeof "--unchanged-line-format"] =
182  {
183    "--unchanged-line-format",
184    "--old-line-format",
185    "--new-line-format"
186  };
187
188static struct option const longopts[] =
189{
190  {"binary", 0, 0, BINARY_OPTION},
191  {"brief", 0, 0, 'q'},
192  {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
193  {"context", 2, 0, 'C'},
194  {"ed", 0, 0, 'e'},
195  {"exclude", 1, 0, 'x'},
196  {"exclude-from", 1, 0, 'X'},
197  {"expand-tabs", 0, 0, 't'},
198  {"forward-ed", 0, 0, 'f'},
199  {"from-file", 1, 0, FROM_FILE_OPTION},
200  {"help", 0, 0, HELP_OPTION},
201  {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
202  {"ifdef", 1, 0, 'D'},
203  {"ignore-all-space", 0, 0, 'w'},
204  {"ignore-blank-lines", 0, 0, 'B'},
205  {"ignore-case", 0, 0, 'i'},
206  {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
207  {"ignore-matching-lines", 1, 0, 'I'},
208  {"ignore-space-change", 0, 0, 'b'},
209  {"ignore-tab-expansion", 0, 0, 'E'},
210  {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
211  {"initial-tab", 0, 0, 'T'},
212  {"label", 1, 0, 'L'},
213  {"left-column", 0, 0, LEFT_COLUMN_OPTION},
214  {"line-format", 1, 0, LINE_FORMAT_OPTION},
215  {"minimal", 0, 0, 'd'},
216  {"new-file", 0, 0, 'N'},
217  {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
218  {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
219  {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
220  {"normal", 0, 0, NORMAL_OPTION},
221  {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
222  {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
223  {"paginate", 0, 0, 'l'},
224  {"rcs", 0, 0, 'n'},
225  {"recursive", 0, 0, 'r'},
226  {"report-identical-files", 0, 0, 's'},
227  {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
228  {"show-c-function", 0, 0, 'p'},
229  {"show-function-line", 1, 0, 'F'},
230  {"side-by-side", 0, 0, 'y'},
231  {"speed-large-files", 0, 0, 'H'},
232  {"starting-file", 1, 0, 'S'},
233  {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
234  {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
235  {"tabsize", 1, 0, TABSIZE_OPTION},
236  {"text", 0, 0, 'a'},
237  {"to-file", 1, 0, TO_FILE_OPTION},
238  {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
239  {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
240  {"unidirectional-new-file", 0, 0, 'P'},
241  {"unified", 2, 0, 'U'},
242  {"version", 0, 0, 'v'},
243  {"width", 1, 0, 'W'},
244  {0, 0, 0, 0}
245};
246
247int
248main (int argc, char **argv)
249{
250  int exit_status = EXIT_SUCCESS;
251  int c;
252  int i;
253  int prev = -1;
254  lin ocontext = -1;
255  bool explicit_context = false;
256  size_t width = 0;
257  bool show_c_function = false;
258  char const *from_file = 0;
259  char const *to_file = 0;
260  uintmax_t numval;
261  char *numend;
262
263  /* Do our initializations.  */
264  exit_failure = 2;
265  initialize_main (&argc, &argv);
266  program_name = argv[0];
267  setlocale (LC_ALL, "");
268  bindtextdomain (PACKAGE, LOCALEDIR);
269  textdomain (PACKAGE);
270  c_stack_action (0);
271  function_regexp_list.buf = &function_regexp;
272  ignore_regexp_list.buf = &ignore_regexp;
273  re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
274  excluded = new_exclude ();
275
276  /* Decode the options.  */
277
278  while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
279    {
280      switch (c)
281	{
282	case 0:
283	  break;
284
285	case '0':
286	case '1':
287	case '2':
288	case '3':
289	case '4':
290	case '5':
291	case '6':
292	case '7':
293	case '8':
294	case '9':
295	  if (! ISDIGIT (prev))
296	    ocontext = c - '0';
297	  else if (LIN_MAX / 10 < ocontext
298		   || ((ocontext = 10 * ocontext + c - '0') < 0))
299	    ocontext = LIN_MAX;
300	  break;
301
302	case 'a':
303	  text = true;
304	  break;
305
306	case 'b':
307	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
308	    ignore_white_space = IGNORE_SPACE_CHANGE;
309	  break;
310
311	case 'B':
312	  ignore_blank_lines = true;
313	  break;
314
315	case 'C':
316	case 'U':
317	  {
318	    if (optarg)
319	      {
320		numval = strtoumax (optarg, &numend, 10);
321		if (*numend)
322		  try_help ("invalid context length `%s'", optarg);
323		if (LIN_MAX < numval)
324		  numval = LIN_MAX;
325	      }
326	    else
327	      numval = 3;
328
329	    specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
330	    if (context < numval)
331	      context = numval;
332	    explicit_context = true;
333	  }
334	  break;
335
336	case 'c':
337	  specify_style (OUTPUT_CONTEXT);
338	  if (context < 3)
339	    context = 3;
340	  break;
341
342	case 'd':
343	  minimal = true;
344	  break;
345
346	case 'D':
347	  specify_style (OUTPUT_IFDEF);
348	  {
349	    static char const C_ifdef_group_formats[] =
350	      "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
351	    char *b = xmalloc (sizeof C_ifdef_group_formats
352			       + 7 * strlen (optarg) - 14 /* 7*"%s" */
353			       - 8 /* 5*"%%" + 3*"%c" */);
354	    sprintf (b, C_ifdef_group_formats,
355		     0,
356		     optarg, optarg, 0,
357		     optarg, optarg, 0,
358		     optarg, optarg, optarg);
359	    for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
360	      {
361		specify_value (&group_format[i], b, "-D");
362		b += strlen (b) + 1;
363	      }
364	  }
365	  break;
366
367	case 'e':
368	  specify_style (OUTPUT_ED);
369	  break;
370
371	case 'E':
372	  if (ignore_white_space < IGNORE_TAB_EXPANSION)
373	    ignore_white_space = IGNORE_TAB_EXPANSION;
374	  break;
375
376	case 'f':
377	  specify_style (OUTPUT_FORWARD_ED);
378	  break;
379
380	case 'F':
381	  add_regexp (&function_regexp_list, optarg);
382	  break;
383
384	case 'h':
385	  /* Split the files into chunks for faster processing.
386	     Usually does not change the result.
387
388	     This currently has no effect.  */
389	  break;
390
391	case 'H':
392	  speed_large_files = true;
393	  break;
394
395	case 'i':
396	  ignore_case = true;
397	  break;
398
399	case 'I':
400	  add_regexp (&ignore_regexp_list, optarg);
401	  break;
402
403	case 'l':
404	  if (!pr_program[0])
405	    try_help ("pagination not supported on this host", 0);
406	  paginate = true;
407#ifdef SIGCHLD
408	  /* Pagination requires forking and waiting, and
409	     System V fork+wait does not work if SIGCHLD is ignored.  */
410	  signal (SIGCHLD, SIG_DFL);
411#endif
412	  break;
413
414	case 'L':
415	  if (!file_label[0])
416	    file_label[0] = optarg;
417	  else if (!file_label[1])
418	    file_label[1] = optarg;
419	  else
420	    fatal ("too many file label options");
421	  break;
422
423	case 'n':
424	  specify_style (OUTPUT_RCS);
425	  break;
426
427	case 'N':
428	  new_file = true;
429	  break;
430
431	case 'p':
432	  show_c_function = true;
433	  add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
434	  break;
435
436	case 'P':
437	  unidirectional_new_file = true;
438	  break;
439
440	case 'q':
441	  brief = true;
442	  break;
443
444	case 'r':
445	  recursive = true;
446	  break;
447
448	case 's':
449	  report_identical_files = true;
450	  break;
451
452	case 'S':
453	  specify_value (&starting_file, optarg, "-S");
454	  break;
455
456	case 't':
457	  expand_tabs = true;
458	  break;
459
460	case 'T':
461	  initial_tab = true;
462	  break;
463
464	case 'u':
465	  specify_style (OUTPUT_UNIFIED);
466	  if (context < 3)
467	    context = 3;
468	  break;
469
470	case 'v':
471	  version_etc (stdout, "diff", PACKAGE_NAME, PACKAGE_VERSION,
472		       "Paul Eggert", "Mike Haertel", "David Hayes",
473		       "Richard Stallman", "Len Tower", (char *) 0);
474	  check_stdout ();
475	  return EXIT_SUCCESS;
476
477	case 'w':
478	  ignore_white_space = IGNORE_ALL_SPACE;
479	  break;
480
481	case 'x':
482	  add_exclude (excluded, optarg, exclude_options ());
483	  break;
484
485	case 'X':
486	  if (add_exclude_file (add_exclude, excluded, optarg,
487				exclude_options (), '\n'))
488	    pfatal_with_name (optarg);
489	  break;
490
491	case 'y':
492	  specify_style (OUTPUT_SDIFF);
493	  break;
494
495	case 'W':
496	  numval = strtoumax (optarg, &numend, 10);
497	  if (! (0 < numval && numval <= SIZE_MAX) || *numend)
498	    try_help ("invalid width `%s'", optarg);
499	  if (width != numval)
500	    {
501	      if (width)
502		fatal ("conflicting width options");
503	      width = numval;
504	    }
505	  break;
506
507	case BINARY_OPTION:
508#if HAVE_SETMODE_DOS
509	  binary = true;
510	  set_binary_mode (STDOUT_FILENO, true);
511#endif
512	  break;
513
514	case FROM_FILE_OPTION:
515	  specify_value (&from_file, optarg, "--from-file");
516	  break;
517
518	case HELP_OPTION:
519	  usage ();
520	  check_stdout ();
521	  return EXIT_SUCCESS;
522
523	case HORIZON_LINES_OPTION:
524	  numval = strtoumax (optarg, &numend, 10);
525	  if (*numend)
526	    try_help ("invalid horizon length `%s'", optarg);
527	  horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
528	  break;
529
530	case IGNORE_FILE_NAME_CASE_OPTION:
531	  ignore_file_name_case = true;
532	  break;
533
534	case INHIBIT_HUNK_MERGE_OPTION:
535	  /* This option is obsolete, but accept it for backward
536             compatibility.  */
537	  break;
538
539	case LEFT_COLUMN_OPTION:
540	  left_column = true;
541	  break;
542
543	case LINE_FORMAT_OPTION:
544	  specify_style (OUTPUT_IFDEF);
545	  for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
546	    specify_value (&line_format[i], optarg, "--line-format");
547	  break;
548
549	case NO_IGNORE_FILE_NAME_CASE_OPTION:
550	  ignore_file_name_case = false;
551	  break;
552
553	case NORMAL_OPTION:
554	  specify_style (OUTPUT_NORMAL);
555	  break;
556
557	case SDIFF_MERGE_ASSIST_OPTION:
558	  specify_style (OUTPUT_SDIFF);
559	  sdiff_merge_assist = true;
560	  break;
561
562	case STRIP_TRAILING_CR_OPTION:
563	  strip_trailing_cr = true;
564	  break;
565
566	case SUPPRESS_COMMON_LINES_OPTION:
567	  suppress_common_lines = true;
568	  break;
569
570	case TABSIZE_OPTION:
571	  numval = strtoumax (optarg, &numend, 10);
572	  if (! (0 < numval && numval <= SIZE_MAX) || *numend)
573	    try_help ("invalid tabsize `%s'", optarg);
574	  if (tabsize != numval)
575	    {
576	      if (tabsize)
577		fatal ("conflicting tabsize options");
578	      tabsize = numval;
579	    }
580	  break;
581
582	case TO_FILE_OPTION:
583	  specify_value (&to_file, optarg, "--to-file");
584	  break;
585
586	case UNCHANGED_LINE_FORMAT_OPTION:
587	case OLD_LINE_FORMAT_OPTION:
588	case NEW_LINE_FORMAT_OPTION:
589	  specify_style (OUTPUT_IFDEF);
590	  c -= UNCHANGED_LINE_FORMAT_OPTION;
591	  specify_value (&line_format[c], optarg, line_format_option[c]);
592	  break;
593
594	case UNCHANGED_GROUP_FORMAT_OPTION:
595	case OLD_GROUP_FORMAT_OPTION:
596	case NEW_GROUP_FORMAT_OPTION:
597	case CHANGED_GROUP_FORMAT_OPTION:
598	  specify_style (OUTPUT_IFDEF);
599	  c -= UNCHANGED_GROUP_FORMAT_OPTION;
600	  specify_value (&group_format[c], optarg, group_format_option[c]);
601	  break;
602
603	default:
604	  try_help (0, 0);
605	}
606      prev = c;
607    }
608
609  if (output_style == OUTPUT_UNSPECIFIED)
610    {
611      if (show_c_function)
612	{
613	  specify_style (OUTPUT_CONTEXT);
614	  if (ocontext < 0)
615	    context = 3;
616	}
617      else
618	specify_style (OUTPUT_NORMAL);
619    }
620
621  if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
622    {
623#ifdef ST_MTIM_NSEC
624      time_format = "%Y-%m-%d %H:%M:%S.%N %z";
625#else
626      time_format = "%Y-%m-%d %H:%M:%S %z";
627#endif
628    }
629  else
630    {
631      /* See POSIX 1003.1-2001 for this format.  */
632      time_format = "%a %b %e %T %Y";
633    }
634
635  if (0 <= ocontext)
636    {
637      bool modern_usage = 200112 <= posix2_version ();
638
639      if ((output_style == OUTPUT_CONTEXT
640	   || output_style == OUTPUT_UNIFIED)
641	  && (context < ocontext
642	      || (ocontext < context && ! explicit_context)))
643	{
644	  if (modern_usage)
645	    {
646	      error (0, 0,
647		     _("`-%ld' option is obsolete; use `-%c %ld'"),
648		     (long int) ocontext,
649		     output_style == OUTPUT_CONTEXT ? 'C' : 'U',
650		     (long int) ocontext);
651	      try_help (0, 0);
652	    }
653	  context = ocontext;
654	}
655      else
656	{
657	  if (modern_usage)
658	    {
659	      error (0, 0, _("`-%ld' option is obsolete; omit it"),
660		     (long int) ocontext);
661	      try_help (0, 0);
662	    }
663	}
664    }
665
666  if (! tabsize)
667    tabsize = 8;
668  if (! width)
669    width = 130;
670
671  {
672    /* Maximize first the half line width, and then the gutter width,
673       according to the following constraints:
674
675	1.  Two half lines plus a gutter must fit in a line.
676	2.  If the half line width is nonzero:
677	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
678	    b.  If tabs are not expanded to spaces,
679		a half line plus a gutter is an integral number of tabs,
680		so that tabs in the right column line up.  */
681
682    intmax_t t = expand_tabs ? 1 : tabsize;
683    intmax_t w = width;
684    intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t)  *  t;
685    sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
686    sdiff_column2_offset = sdiff_half_width ? off : w;
687  }
688
689  /* Make the horizon at least as large as the context, so that
690     shift_boundaries has more freedom to shift the first and last hunks.  */
691  if (horizon_lines < context)
692    horizon_lines = context;
693
694  summarize_regexp_list (&function_regexp_list);
695  summarize_regexp_list (&ignore_regexp_list);
696
697  if (output_style == OUTPUT_IFDEF)
698    {
699      for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
700	if (!line_format[i])
701	  line_format[i] = "%l\n";
702      if (!group_format[OLD])
703	group_format[OLD]
704	  = group_format[CHANGED] ? group_format[CHANGED] : "%<";
705      if (!group_format[NEW])
706	group_format[NEW]
707	  = group_format[CHANGED] ? group_format[CHANGED] : "%>";
708      if (!group_format[UNCHANGED])
709	group_format[UNCHANGED] = "%=";
710      if (!group_format[CHANGED])
711	group_format[CHANGED] = concat (group_format[OLD],
712					group_format[NEW], "");
713    }
714
715  no_diff_means_no_output =
716    (output_style == OUTPUT_IFDEF ?
717      (!*group_format[UNCHANGED]
718       || (strcmp (group_format[UNCHANGED], "%=") == 0
719	   && !*line_format[UNCHANGED]))
720     : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
721
722  files_can_be_treated_as_binary =
723    (brief & binary
724     & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
725	  | (ignore_regexp_list.regexps || ignore_white_space)));
726
727  switch_string = option_list (argv + 1, optind - 1);
728
729  if (from_file)
730    {
731      if (to_file)
732	fatal ("--from-file and --to-file both specified");
733      else
734	for (; optind < argc; optind++)
735	  {
736	    int status = compare_files ((struct comparison *) 0,
737					from_file, argv[optind]);
738	    if (exit_status < status)
739	      exit_status = status;
740	  }
741    }
742  else
743    {
744      if (to_file)
745	for (; optind < argc; optind++)
746	  {
747	    int status = compare_files ((struct comparison *) 0,
748					argv[optind], to_file);
749	    if (exit_status < status)
750	      exit_status = status;
751	  }
752      else
753	{
754	  if (argc - optind != 2)
755	    {
756	      if (argc - optind < 2)
757		try_help ("missing operand after `%s'", argv[argc - 1]);
758	      else
759		try_help ("extra operand `%s'", argv[optind + 2]);
760	    }
761
762	  exit_status = compare_files ((struct comparison *) 0,
763				       argv[optind], argv[optind + 1]);
764	}
765    }
766
767  /* Print any messages that were saved up for last.  */
768  print_message_queue ();
769
770  check_stdout ();
771  exit (exit_status);
772  return exit_status;
773}
774
775/* Append to REGLIST the regexp PATTERN.  */
776
777static void
778add_regexp (struct regexp_list *reglist, char const *pattern)
779{
780  size_t patlen = strlen (pattern);
781  char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
782
783  if (m != 0)
784    error (0, 0, "%s: %s", pattern, m);
785  else
786    {
787      char *regexps = reglist->regexps;
788      size_t len = reglist->len;
789      bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
790      size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
791      size_t size = reglist->size;
792
793      if (size <= newlen)
794	{
795	  if (!size)
796	    size = 1;
797
798	  do size *= 2;
799	  while (size <= newlen);
800
801	  reglist->size = size;
802	  reglist->regexps = regexps = xrealloc (regexps, size);
803	}
804      if (multiple_regexps)
805	{
806	  regexps[len++] = '\\';
807	  regexps[len++] = '|';
808	}
809      memcpy (regexps + len, pattern, patlen + 1);
810    }
811}
812
813/* Ensure that REGLIST represents the disjunction of its regexps.
814   This is done here, rather than earlier, to avoid O(N^2) behavior.  */
815
816static void
817summarize_regexp_list (struct regexp_list *reglist)
818{
819  if (reglist->regexps)
820    {
821      /* At least one regexp was specified.  Allocate a fastmap for it.  */
822      reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
823      if (reglist->multiple_regexps)
824	{
825	  /* Compile the disjunction of the regexps.
826	     (If just one regexp was specified, it is already compiled.)  */
827	  char const *m = re_compile_pattern (reglist->regexps, reglist->len,
828					      reglist->buf);
829	  if (m != 0)
830	    error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
831	}
832    }
833}
834
835static void
836try_help (char const *reason_msgid, char const *operand)
837{
838  if (reason_msgid)
839    error (0, 0, _(reason_msgid), operand);
840  error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
841	 program_name);
842  abort ();
843}
844
845static void
846check_stdout (void)
847{
848  if (ferror (stdout))
849    fatal ("write failed");
850  else if (fclose (stdout) != 0)
851    pfatal_with_name (_("standard output"));
852}
853
854static char const * const option_help_msgid[] = {
855  N_("Compare files line by line."),
856  "",
857  N_("-i  --ignore-case  Ignore case differences in file contents."),
858  N_("--ignore-file-name-case  Ignore case when comparing file names."),
859  N_("--no-ignore-file-name-case  Consider case when comparing file names."),
860  N_("-E  --ignore-tab-expansion  Ignore changes due to tab expansion."),
861  N_("-b  --ignore-space-change  Ignore changes in the amount of white space."),
862  N_("-w  --ignore-all-space  Ignore all white space."),
863  N_("-B  --ignore-blank-lines  Ignore changes whose lines are all blank."),
864  N_("-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE."),
865  N_("--strip-trailing-cr  Strip trailing carriage return on input."),
866#if HAVE_SETMODE_DOS
867  N_("--binary  Read and write data in binary mode."),
868#endif
869  N_("-a  --text  Treat all files as text."),
870  "",
871  N_("-c  -C NUM  --context[=NUM]  Output NUM (default 3) lines of copied context.\n\
872-u  -U NUM  --unified[=NUM]  Output NUM (default 3) lines of unified context.\n\
873  --label LABEL  Use LABEL instead of file name.\n\
874  -p  --show-c-function  Show which C function each change is in.\n\
875  -F RE  --show-function-line=RE  Show the most recent line matching RE."),
876  N_("-q  --brief  Output only whether files differ."),
877  N_("-e  --ed  Output an ed script."),
878  N_("--normal  Output a normal diff."),
879  N_("-n  --rcs  Output an RCS format diff."),
880  N_("-y  --side-by-side  Output in two columns.\n\
881  -W NUM  --width=NUM  Output at most NUM (default 130) print columns.\n\
882  --left-column  Output only the left column of common lines.\n\
883  --suppress-common-lines  Do not output common lines."),
884  N_("-D NAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs."),
885  N_("--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT."),
886  N_("--line-format=LFMT  Similar, but format all input lines with LFMT."),
887  N_("--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT."),
888  N_("  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'."),
889  N_("  GFMT may contain:\n\
890    %<  lines from FILE1\n\
891    %>  lines from FILE2\n\
892    %=  lines common to FILE1 and FILE2\n\
893    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER\n\
894      LETTERs are as follows for new group, lower case for old group:\n\
895        F  first line number\n\
896        L  last line number\n\
897        N  number of lines = L-F+1\n\
898        E  F-1\n\
899        M  L+1"),
900  N_("  LFMT may contain:\n\
901    %L  contents of line\n\
902    %l  contents of line, excluding any trailing newline\n\
903    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number"),
904  N_("  Either GFMT or LFMT may contain:\n\
905    %%  %\n\
906    %c'C'  the single character C\n\
907    %c'\\OOO'  the character with octal code OOO"),
908  "",
909  N_("-l  --paginate  Pass the output through `pr' to paginate it."),
910  N_("-t  --expand-tabs  Expand tabs to spaces in output."),
911  N_("-T  --initial-tab  Make tabs line up by prepending a tab."),
912  N_("--tabsize=NUM  Tab stops are every NUM (default 8) print columns."),
913  "",
914  N_("-r  --recursive  Recursively compare any subdirectories found."),
915  N_("-N  --new-file  Treat absent files as empty."),
916  N_("--unidirectional-new-file  Treat absent first files as empty."),
917  N_("-s  --report-identical-files  Report when two files are the same."),
918  N_("-x PAT  --exclude=PAT  Exclude files that match PAT."),
919  N_("-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE."),
920  N_("-S FILE  --starting-file=FILE  Start with FILE when comparing directories."),
921  N_("--from-file=FILE1  Compare FILE1 to all operands.  FILE1 can be a directory."),
922  N_("--to-file=FILE2  Compare all operands to FILE2.  FILE2 can be a directory."),
923  "",
924  N_("--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix."),
925  N_("-d  --minimal  Try hard to find a smaller set of changes."),
926  N_("--speed-large-files  Assume large files and many scattered small changes."),
927  "",
928  N_("-v  --version  Output version info."),
929  N_("--help  Output this help."),
930  "",
931  N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
932  N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
933  N_("If a FILE is `-', read standard input."),
934  N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
935  "",
936  N_("Report bugs to <bug-gnu-utils@gnu.org>."),
937  0
938};
939
940static void
941usage (void)
942{
943  char const * const *p;
944
945  printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
946
947  for (p = option_help_msgid;  *p;  p++)
948    {
949      if (!**p)
950	putchar ('\n');
951      else
952	{
953	  char const *msg = _(*p);
954	  char const *nl;
955	  while ((nl = strchr (msg, '\n')))
956	    {
957	      int msglen = nl + 1 - msg;
958	      printf ("  %.*s", msglen, msg);
959	      msg = nl + 1;
960	    }
961
962	  printf ("  %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
963	}
964    }
965}
966
967/* Set VAR to VALUE, reporting an OPTION error if this is a
968   conflict.  */
969static void
970specify_value (char const **var, char const *value, char const *option)
971{
972  if (*var && strcmp (*var, value) != 0)
973    {
974      error (0, 0, _("conflicting %s option value `%s'"), option, value);
975      try_help (0, 0);
976    }
977  *var = value;
978}
979
980/* Set the output style to STYLE, diagnosing conflicts.  */
981static void
982specify_style (enum output_style style)
983{
984  if (output_style != style)
985    {
986      if (output_style != OUTPUT_UNSPECIFIED)
987	try_help ("conflicting output style options", 0);
988      output_style = style;
989    }
990}
991
992/* Set the last-modified time of *ST to be the current time.  */
993
994static void
995set_mtime_to_now (struct stat *st)
996{
997#ifdef ST_MTIM_NSEC
998
999# if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1000  if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1001    return;
1002# endif
1003
1004# if HAVE_GETTIMEOFDAY
1005  {
1006    struct timeval timeval;
1007    if (gettimeofday (&timeval, 0) == 0)
1008      {
1009	st->st_mtime = timeval.tv_sec;
1010	st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1011	return;
1012      }
1013  }
1014# endif
1015
1016#endif /* ST_MTIM_NSEC */
1017
1018  time (&st->st_mtime);
1019}
1020
1021/* Compare two files (or dirs) with parent comparison PARENT
1022   and names NAME0 and NAME1.
1023   (If PARENT is 0, then the first name is just NAME0, etc.)
1024   This is self-contained; it opens the files and closes them.
1025
1026   Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1027   different, EXIT_TROUBLE if there is a problem opening them.  */
1028
1029static int
1030compare_files (struct comparison const *parent,
1031	       char const *name0,
1032	       char const *name1)
1033{
1034  struct comparison cmp;
1035#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1036  register int f;
1037  int status = EXIT_SUCCESS;
1038  bool same_files;
1039  char *free0, *free1;
1040
1041  /* If this is directory comparison, perhaps we have a file
1042     that exists only in one of the directories.
1043     If so, just print a message to that effect.  */
1044
1045  if (! ((name0 && name1)
1046	 || (unidirectional_new_file && name1)
1047	 || new_file))
1048    {
1049      char const *name = name0 == 0 ? name1 : name0;
1050      char const *dir = parent->file[name0 == 0].name;
1051
1052      /* See POSIX 1003.1-2001 for this format.  */
1053      message ("Only in %s: %s\n", dir, name);
1054
1055      /* Return EXIT_FAILURE so that diff_dirs will return
1056	 EXIT_FAILURE ("some files differ").  */
1057      return EXIT_FAILURE;
1058    }
1059
1060  memset (cmp.file, 0, sizeof cmp.file);
1061  cmp.parent = parent;
1062
1063  /* cmp.file[f].desc markers */
1064#define NONEXISTENT (-1) /* nonexistent file */
1065#define UNOPENED (-2) /* unopened file (e.g. directory) */
1066#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1067
1068#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1069
1070  cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1071  cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1072
1073  /* Now record the full name of each file, including nonexistent ones.  */
1074
1075  if (name0 == 0)
1076    name0 = name1;
1077  if (name1 == 0)
1078    name1 = name0;
1079
1080  if (!parent)
1081    {
1082      free0 = 0;
1083      free1 = 0;
1084      cmp.file[0].name = name0;
1085      cmp.file[1].name = name1;
1086    }
1087  else
1088    {
1089      cmp.file[0].name = free0
1090	= dir_file_pathname (parent->file[0].name, name0);
1091      cmp.file[1].name = free1
1092	= dir_file_pathname (parent->file[1].name, name1);
1093    }
1094
1095  /* Stat the files.  */
1096
1097  for (f = 0; f < 2; f++)
1098    {
1099      if (cmp.file[f].desc != NONEXISTENT)
1100	{
1101	  if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1102	    {
1103	      cmp.file[f].desc = cmp.file[0].desc;
1104	      cmp.file[f].stat = cmp.file[0].stat;
1105	    }
1106	  else if (strcmp (cmp.file[f].name, "-") == 0)
1107	    {
1108	      cmp.file[f].desc = STDIN_FILENO;
1109	      if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1110		cmp.file[f].desc = ERRNO_ENCODE (errno);
1111	      else
1112		{
1113		  if (S_ISREG (cmp.file[f].stat.st_mode))
1114		    {
1115		      off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1116		      if (pos < 0)
1117			cmp.file[f].desc = ERRNO_ENCODE (errno);
1118		      else
1119			cmp.file[f].stat.st_size =
1120			  MAX (0, cmp.file[f].stat.st_size - pos);
1121		    }
1122
1123		  /* POSIX 1003.1-2001 requires current time for
1124		     stdin.  */
1125		  set_mtime_to_now (&cmp.file[f].stat);
1126		}
1127	    }
1128	  else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1129	    cmp.file[f].desc = ERRNO_ENCODE (errno);
1130	}
1131    }
1132
1133  /* Mark files as nonexistent as needed for -N and -P, if they are
1134     inaccessible empty regular files (the kind of files that 'patch'
1135     creates to indicate nonexistent backups), or if they are
1136     top-level files that do not exist but their counterparts do
1137     exist.  */
1138  for (f = 0; f < 2; f++)
1139    if ((new_file || (f == 0 && unidirectional_new_file))
1140	&& (cmp.file[f].desc == UNOPENED
1141	    ? (S_ISREG (cmp.file[f].stat.st_mode)
1142	       && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
1143	       && cmp.file[f].stat.st_size == 0)
1144	    : (cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
1145	       && ! parent
1146	       && cmp.file[1 - f].desc == UNOPENED)))
1147      cmp.file[f].desc = NONEXISTENT;
1148
1149  for (f = 0; f < 2; f++)
1150    if (cmp.file[f].desc == NONEXISTENT)
1151      {
1152	memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
1153	cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1154      }
1155
1156  for (f = 0; f < 2; f++)
1157    {
1158      int e = ERRNO_DECODE (cmp.file[f].desc);
1159      if (0 <= e)
1160	{
1161	  errno = e;
1162	  perror_with_name (cmp.file[f].name);
1163	  status = EXIT_TROUBLE;
1164	}
1165    }
1166
1167  if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1168    {
1169      /* If one is a directory, and it was specified in the command line,
1170	 use the file in that dir with the other file's basename.  */
1171
1172      int fnm_arg = DIR_P (0);
1173      int dir_arg = 1 - fnm_arg;
1174      char const *fnm = cmp.file[fnm_arg].name;
1175      char const *dir = cmp.file[dir_arg].name;
1176      char const *filename = cmp.file[dir_arg].name = free0
1177	= dir_file_pathname (dir, base_name (fnm));
1178
1179      if (strcmp (fnm, "-") == 0)
1180	fatal ("cannot compare `-' to a directory");
1181
1182      if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1183	{
1184	  perror_with_name (filename);
1185	  status = EXIT_TROUBLE;
1186	}
1187    }
1188
1189  if (status != EXIT_SUCCESS)
1190    {
1191      /* One of the files should exist but does not.  */
1192    }
1193  else if (cmp.file[0].desc == NONEXISTENT
1194	   && cmp.file[1].desc == NONEXISTENT)
1195    {
1196      /* Neither file "exists", so there's nothing to compare.  */
1197    }
1198  else if ((same_files
1199	    = (cmp.file[0].desc != NONEXISTENT
1200	       && cmp.file[1].desc != NONEXISTENT
1201	       && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1202	       && same_file_attributes (&cmp.file[0].stat,
1203					&cmp.file[1].stat)))
1204	   && no_diff_means_no_output)
1205    {
1206      /* The two named files are actually the same physical file.
1207	 We know they are identical without actually reading them.  */
1208    }
1209  else if (DIR_P (0) & DIR_P (1))
1210    {
1211      if (output_style == OUTPUT_IFDEF)
1212	fatal ("-D option not supported with directories");
1213
1214      /* If both are directories, compare the files in them.  */
1215
1216      if (parent && !recursive)
1217	{
1218	  /* But don't compare dir contents one level down
1219	     unless -r was specified.
1220	     See POSIX 1003.1-2001 for this format.  */
1221	  message ("Common subdirectories: %s and %s\n",
1222		   cmp.file[0].name, cmp.file[1].name);
1223	}
1224      else
1225	status = diff_dirs (&cmp, compare_files);
1226    }
1227  else if ((DIR_P (0) | DIR_P (1))
1228	   || (parent
1229	       && (! S_ISREG (cmp.file[0].stat.st_mode)
1230		   || ! S_ISREG (cmp.file[1].stat.st_mode))))
1231    {
1232      if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1233	{
1234	  /* We have a subdirectory that exists only in one directory.  */
1235
1236	  if ((DIR_P (0) | DIR_P (1))
1237	      && recursive
1238	      && (new_file
1239		  || (unidirectional_new_file
1240		      && cmp.file[0].desc == NONEXISTENT)))
1241	    status = diff_dirs (&cmp, compare_files);
1242	  else
1243	    {
1244	      char const *dir
1245		= parent->file[cmp.file[0].desc == NONEXISTENT].name;
1246
1247	      /* See POSIX 1003.1-2001 for this format.  */
1248	      message ("Only in %s: %s\n", dir, name0);
1249
1250	      status = EXIT_FAILURE;
1251	    }
1252	}
1253      else
1254	{
1255	  /* We have two files that are not to be compared.  */
1256
1257	  /* See POSIX 1003.1-2001 for this format.  */
1258	  message5 ("File %s is a %s while file %s is a %s\n",
1259		    file_label[0] ? file_label[0] : cmp.file[0].name,
1260		    file_type (&cmp.file[0].stat),
1261		    file_label[1] ? file_label[1] : cmp.file[1].name,
1262		    file_type (&cmp.file[1].stat));
1263
1264	  /* This is a difference.  */
1265	  status = EXIT_FAILURE;
1266	}
1267    }
1268  else if (files_can_be_treated_as_binary
1269	   && S_ISREG (cmp.file[0].stat.st_mode)
1270	   && S_ISREG (cmp.file[1].stat.st_mode)
1271	   && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size)
1272    {
1273      message ("Files %s and %s differ\n",
1274	       file_label[0] ? file_label[0] : cmp.file[0].name,
1275	       file_label[1] ? file_label[1] : cmp.file[1].name);
1276      status = EXIT_FAILURE;
1277    }
1278  else
1279    {
1280      /* Both exist and neither is a directory.  */
1281
1282      /* Open the files and record their descriptors.  */
1283
1284      if (cmp.file[0].desc == UNOPENED)
1285	if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1286	  {
1287	    perror_with_name (cmp.file[0].name);
1288	    status = EXIT_TROUBLE;
1289	  }
1290      if (cmp.file[1].desc == UNOPENED)
1291	{
1292	  if (same_files)
1293	    cmp.file[1].desc = cmp.file[0].desc;
1294	  else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1295		   < 0)
1296	    {
1297	      perror_with_name (cmp.file[1].name);
1298	      status = EXIT_TROUBLE;
1299	    }
1300	}
1301
1302#if HAVE_SETMODE_DOS
1303      if (binary)
1304	for (f = 0; f < 2; f++)
1305	  if (0 <= cmp.file[f].desc)
1306	    set_binary_mode (cmp.file[f].desc, true);
1307#endif
1308
1309      /* Compare the files, if no error was found.  */
1310
1311      if (status == EXIT_SUCCESS)
1312	status = diff_2_files (&cmp);
1313
1314      /* Close the file descriptors.  */
1315
1316      if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1317	{
1318	  perror_with_name (cmp.file[0].name);
1319	  status = EXIT_TROUBLE;
1320	}
1321      if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1322	  && close (cmp.file[1].desc) != 0)
1323	{
1324	  perror_with_name (cmp.file[1].name);
1325	  status = EXIT_TROUBLE;
1326	}
1327    }
1328
1329  /* Now the comparison has been done, if no error prevented it,
1330     and STATUS is the value this function will return.  */
1331
1332  if (status == EXIT_SUCCESS)
1333    {
1334      if (report_identical_files && !DIR_P (0))
1335	message ("Files %s and %s are identical\n",
1336		 file_label[0] ? file_label[0] : cmp.file[0].name,
1337		 file_label[1] ? file_label[1] : cmp.file[1].name);
1338    }
1339  else
1340    {
1341      /* Flush stdout so that the user sees differences immediately.
1342	 This can hurt performance, unfortunately.  */
1343      if (fflush (stdout) != 0)
1344	pfatal_with_name (_("standard output"));
1345    }
1346
1347  if (free0)
1348    free (free0);
1349  if (free1)
1350    free (free1);
1351
1352  return status;
1353}
1354