1/*	$NetBSD: diff.c,v 1.2 2006/01/14 09:18:17 apb Exp $	*/
2
3/* diff - compare files line by line
4
5   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002
6   Free Software Foundation, Inc.
7
8   This file is part of GNU DIFF.
9
10   GNU DIFF is free software; you can redistribute it and/or modify
11   it under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 2, or (at your option)
13   any later version.
14
15   GNU DIFF is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18   See the GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with GNU DIFF; see the file COPYING.
22   If not, write to the Free Software Foundation,
23   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24
25#define GDIFF_MAIN
26#include "diff.h"
27#include <c-stack.h>
28#include <dirname.h>
29#include <error.h>
30#include <exclude.h>
31#include <exitfail.h>
32#include <fnmatch.h>
33#include <freesoft.h>
34#include <getopt.h>
35#include <hard-locale.h>
36#include <prepargs.h>
37#include <quotesys.h>
38#include <regex.h>
39#include <setmode.h>
40#include <xalloc.h>
41#include <posixver.h>
42
43static char const authorship_msgid[] =
44  N_("Written by Paul Eggert, Mike Haertel, David Hayes,\n\
45Richard Stallman, and Len Tower.");
46
47static char const copyright_string[] =
48  "Copyright (C) 2002 Free Software Foundation, Inc.";
49
50#ifndef GUTTER_WIDTH_MINIMUM
51# define GUTTER_WIDTH_MINIMUM 3
52#endif
53
54struct regexp_list
55{
56  char *regexps;	/* chars representing disjunction of the regexps */
57  size_t len;		/* chars used in `regexps' */
58  size_t size;		/* size malloc'ed for `regexps'; 0 if not malloc'ed */
59  bool multiple_regexps;/* Does `regexps' represent a disjunction?  */
60  struct re_pattern_buffer *buf;
61};
62
63static int compare_files (struct comparison const *, char const *, char const *);
64static void add_regexp (struct regexp_list *, char const *);
65static void summarize_regexp_list (struct regexp_list *);
66static void specify_style (enum output_style);
67static void specify_value (char const **, char const *, char const *);
68static void try_help (char const *, char const *) __attribute__((noreturn));
69static void check_stdout (void);
70static void usage (void);
71
72/* If comparing directories, compare their common subdirectories
73   recursively.  */
74static bool recursive;
75
76/* In context diffs, show previous lines that match these regexps.  */
77static struct regexp_list function_regexp_list;
78
79/* Ignore changes affecting only lines that match these regexps.  */
80static struct regexp_list ignore_regexp_list;
81
82#if HAVE_SETMODE_DOS
83/* Use binary I/O when reading and writing data (--binary).
84   On POSIX hosts, this has no effect.  */
85static bool binary;
86#endif
87
88/* When comparing directories, if a file appears only in one
89   directory, treat it as present but empty in the other (-N).
90   Then `patch' would create the file with appropriate contents.  */
91static bool new_file;
92
93/* When comparing directories, if a file appears only in the second
94   directory of the two, treat it as present but empty in the other
95   (--unidirectional-new-file).
96   Then `patch' would create the file with appropriate contents.  */
97static bool unidirectional_new_file;
98
99/* Report files compared that are the same (-s).
100   Normally nothing is output when that happens.  */
101static bool report_identical_files;
102
103
104/* Return a string containing the command options with which diff was invoked.
105   Spaces appear between what were separate ARGV-elements.
106   There is a space at the beginning but none at the end.
107   If there were no options, the result is an empty string.
108
109   Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
110   the length of that vector.  */
111
112static char *
113option_list (char **optionvec, int count)
114{
115  int i;
116  size_t size = 1;
117  char *result;
118  char *p;
119
120  for (i = 0; i < count; i++)
121    size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
122
123  p = result = xmalloc (size);
124
125  for (i = 0; i < count; i++)
126    {
127      *p++ = ' ';
128      p += quote_system_arg (p, optionvec[i]);
129    }
130
131  *p = 0;
132  return result;
133}
134
135
136/* Return an option value suitable for add_exclude.  */
137
138static int
139exclude_options (void)
140{
141  return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
142}
143
144static char const shortopts[] =
145"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
146
147/* Values for long options that do not have single-letter equivalents.  */
148enum
149{
150  BINARY_OPTION = CHAR_MAX + 1,
151  FROM_FILE_OPTION,
152  HELP_OPTION,
153  HORIZON_LINES_OPTION,
154  IGNORE_FILE_NAME_CASE_OPTION,
155  INHIBIT_HUNK_MERGE_OPTION,
156  LEFT_COLUMN_OPTION,
157  LINE_FORMAT_OPTION,
158  NO_IGNORE_FILE_NAME_CASE_OPTION,
159  NORMAL_OPTION,
160  SDIFF_MERGE_ASSIST_OPTION,
161  STRIP_TRAILING_CR_OPTION,
162  SUPPRESS_COMMON_LINES_OPTION,
163  TO_FILE_OPTION,
164
165  /* These options must be in sequence.  */
166  UNCHANGED_LINE_FORMAT_OPTION,
167  OLD_LINE_FORMAT_OPTION,
168  NEW_LINE_FORMAT_OPTION,
169
170  /* These options must be in sequence.  */
171  UNCHANGED_GROUP_FORMAT_OPTION,
172  OLD_GROUP_FORMAT_OPTION,
173  NEW_GROUP_FORMAT_OPTION,
174  CHANGED_GROUP_FORMAT_OPTION
175};
176
177static char const group_format_option[][sizeof "--unchanged-group-format"] =
178  {
179    "--unchanged-group-format",
180    "--old-group-format",
181    "--new-group-format",
182    "--changed-group-format"
183  };
184
185static char const line_format_option[][sizeof "--unchanged-line-format"] =
186  {
187    "--unchanged-line-format",
188    "--old-line-format",
189    "--new-line-format"
190  };
191
192static struct option const longopts[] =
193{
194  {"binary", 0, 0, BINARY_OPTION},
195  {"brief", 0, 0, 'q'},
196  {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
197  {"context", 2, 0, 'C'},
198  {"ed", 0, 0, 'e'},
199  {"exclude", 1, 0, 'x'},
200  {"exclude-from", 1, 0, 'X'},
201  {"expand-tabs", 0, 0, 't'},
202  {"forward-ed", 0, 0, 'f'},
203  {"from-file", 1, 0, FROM_FILE_OPTION},
204  {"help", 0, 0, HELP_OPTION},
205  {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
206  {"ifdef", 1, 0, 'D'},
207  {"ignore-all-space", 0, 0, 'w'},
208  {"ignore-blank-lines", 0, 0, 'B'},
209  {"ignore-case", 0, 0, 'i'},
210  {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
211  {"ignore-matching-lines", 1, 0, 'I'},
212  {"ignore-space-change", 0, 0, 'b'},
213  {"ignore-tab-expansion", 0, 0, 'E'},
214  {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
215  {"initial-tab", 0, 0, 'T'},
216  {"label", 1, 0, 'L'},
217  {"left-column", 0, 0, LEFT_COLUMN_OPTION},
218  {"line-format", 1, 0, LINE_FORMAT_OPTION},
219  {"minimal", 0, 0, 'd'},
220  {"new-file", 0, 0, 'N'},
221  {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
222  {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
223  {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
224  {"normal", 0, 0, NORMAL_OPTION},
225  {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
226  {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
227  {"paginate", 0, 0, 'l'},
228  {"rcs", 0, 0, 'n'},
229  {"recursive", 0, 0, 'r'},
230  {"report-identical-files", 0, 0, 's'},
231  {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
232  {"show-c-function", 0, 0, 'p'},
233  {"show-function-line", 1, 0, 'F'},
234  {"side-by-side", 0, 0, 'y'},
235  {"speed-large-files", 0, 0, 'H'},
236  {"starting-file", 1, 0, 'S'},
237  {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
238  {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
239  {"text", 0, 0, 'a'},
240  {"to-file", 1, 0, TO_FILE_OPTION},
241  {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
242  {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
243  {"unidirectional-new-file", 0, 0, 'P'},
244  {"unified", 2, 0, 'U'},
245  {"version", 0, 0, 'v'},
246  {"width", 1, 0, 'W'},
247  {0, 0, 0, 0}
248};
249
250int
251main (int argc, char **argv)
252{
253  int exit_status = EXIT_SUCCESS;
254  int c;
255  int i;
256  int prev = -1;
257  lin ocontext = -1;
258  bool explicit_context = 0;
259  int width = 0;
260  bool show_c_function = 0;
261  char const *from_file = 0;
262  char const *to_file = 0;
263  uintmax_t numval;
264  char *numend;
265
266  /* Do our initializations.  */
267  exit_failure = 2;
268  initialize_main (&argc, &argv);
269  program_name = argv[0];
270  setlocale (LC_ALL, "");
271  bindtextdomain (PACKAGE, LOCALEDIR);
272  textdomain (PACKAGE);
273  c_stack_action (c_stack_die);
274  function_regexp_list.buf = &function_regexp;
275  ignore_regexp_list.buf = &ignore_regexp;
276  re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
277  excluded = new_exclude ();
278
279  /* Decode the options.  */
280
281  while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
282    {
283      switch (c)
284	{
285	case 0:
286	  break;
287
288	case '0':
289	case '1':
290	case '2':
291	case '3':
292	case '4':
293	case '5':
294	case '6':
295	case '7':
296	case '8':
297	case '9':
298	  if (! ISDIGIT (prev))
299	    ocontext = c - '0';
300	  else if (LIN_MAX / 10 < ocontext
301		   || ((ocontext = 10 * ocontext + c - '0') < 0))
302	    ocontext = LIN_MAX;
303	  break;
304
305	case 'a':
306	  text = 1;
307	  break;
308
309	case 'b':
310	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
311	    ignore_white_space = IGNORE_SPACE_CHANGE;
312	  break;
313
314	case 'B':
315	  ignore_blank_lines = 1;
316	  break;
317
318	case 'C':		/* +context[=lines] */
319	case 'U':		/* +unified[=lines] */
320	  {
321	    if (optarg)
322	      {
323		numval = strtoumax (optarg, &numend, 10);
324		if (*numend)
325		  try_help ("invalid context length `%s'", optarg);
326		if (LIN_MAX < numval)
327		  numval = LIN_MAX;
328	      }
329	    else
330	      numval = 3;
331
332	    specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
333	    if (context < numval)
334	      context = numval;
335	    explicit_context = 1;
336	  }
337	  break;
338
339	case 'c':
340	  specify_style (OUTPUT_CONTEXT);
341	  if (context < 3)
342	    context = 3;
343	  break;
344
345	case 'd':
346	  minimal = 1;
347	  break;
348
349	case 'D':
350	  specify_style (OUTPUT_IFDEF);
351	  {
352	    static char const C_ifdef_group_formats[] =
353	      "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
354	    char *b = xmalloc (sizeof C_ifdef_group_formats
355			       + 7 * strlen (optarg) - 14 /* 7*"%s" */
356			       - 8 /* 5*"%%" + 3*"%c" */);
357	    sprintf (b, C_ifdef_group_formats,
358		     0,
359		     optarg, optarg, 0,
360		     optarg, optarg, 0,
361		     optarg, optarg, optarg);
362	    for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
363	      {
364		specify_value (&group_format[i], b, "-D");
365		b += strlen (b) + 1;
366	      }
367	  }
368	  break;
369
370	case 'e':
371	  specify_style (OUTPUT_ED);
372	  break;
373
374	case 'E':
375	  if (ignore_white_space < IGNORE_TAB_EXPANSION)
376	    ignore_white_space = IGNORE_TAB_EXPANSION;
377	  break;
378
379	case 'f':
380	  specify_style (OUTPUT_FORWARD_ED);
381	  break;
382
383	case 'F':
384	  add_regexp (&function_regexp_list, optarg);
385	  break;
386
387	case 'h':
388	  /* Split the files into chunks for faster processing.
389	     Usually does not change the result.
390
391	     This currently has no effect.  */
392	  break;
393
394	case 'H':
395	  speed_large_files = 1;
396	  break;
397
398	case 'i':
399	  ignore_case = 1;
400	  break;
401
402	case 'I':
403	  add_regexp (&ignore_regexp_list, optarg);
404	  break;
405
406	case 'l':
407	  if (!pr_program[0])
408	    try_help ("pagination not supported on this host", 0);
409	  paginate = 1;
410#ifdef SIGCHLD
411	  /* Pagination requires forking and waiting, and
412	     System V fork+wait does not work if SIGCHLD is ignored.  */
413	  signal (SIGCHLD, SIG_DFL);
414#endif
415	  break;
416
417	case 'L':
418	  if (!file_label[0])
419	    file_label[0] = optarg;
420	  else if (!file_label[1])
421	    file_label[1] = optarg;
422	  else
423	    fatal ("too many file label options");
424	  break;
425
426	case 'n':
427	  specify_style (OUTPUT_RCS);
428	  break;
429
430	case 'N':
431	  new_file = 1;
432	  break;
433
434	case 'p':
435	  show_c_function = 1;
436	  add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
437	  break;
438
439	case 'P':
440	  unidirectional_new_file = 1;
441	  break;
442
443	case 'q':
444	  brief = 1;
445	  break;
446
447	case 'r':
448	  recursive = 1;
449	  break;
450
451	case 's':
452	  report_identical_files = 1;
453	  break;
454
455	case 'S':
456	  specify_value (&starting_file, optarg, "-S");
457	  break;
458
459	case 't':
460	  expand_tabs = 1;
461	  break;
462
463	case 'T':
464	  initial_tab = 1;
465	  break;
466
467	case 'u':
468	  specify_style (OUTPUT_UNIFIED);
469	  if (context < 3)
470	    context = 3;
471	  break;
472
473	case 'v':
474	  printf ("diff %s\n%s\n\n%s\n\n%s\n",
475		  version_string, copyright_string,
476		  _(free_software_msgid), _(authorship_msgid));
477	  check_stdout ();
478	  return EXIT_SUCCESS;
479
480	case 'w':
481	  ignore_white_space = IGNORE_ALL_SPACE;
482	  break;
483
484	case 'x':
485	  add_exclude (excluded, optarg, exclude_options ());
486	  break;
487
488	case 'X':
489	  if (add_exclude_file (add_exclude, excluded, optarg,
490				exclude_options (), '\n'))
491	    pfatal_with_name (optarg);
492	  break;
493
494	case 'y':
495	  specify_style (OUTPUT_SDIFF);
496	  break;
497
498	case 'W':
499	  numval = strtoumax (optarg, &numend, 10);
500	  if (! (0 < numval && numval <= INT_MAX) || *numend)
501	    try_help ("invalid width `%s'", optarg);
502	  if (width != numval)
503	    {
504	      if (width)
505		fatal ("conflicting width options");
506	      width = numval;
507	    }
508	  break;
509
510	case BINARY_OPTION:
511#if HAVE_SETMODE_DOS
512	  binary = 1;
513	  set_binary_mode (STDOUT_FILENO, 1);
514#endif
515	  break;
516
517	case FROM_FILE_OPTION:
518	  specify_value (&from_file, optarg, "--from-file");
519	  break;
520
521	case HELP_OPTION:
522	  usage ();
523	  check_stdout ();
524	  return EXIT_SUCCESS;
525
526	case HORIZON_LINES_OPTION:
527	  numval = strtoumax (optarg, &numend, 10);
528	  if (*numend)
529	    try_help ("invalid horizon length `%s'", optarg);
530	  horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
531	  break;
532
533	case IGNORE_FILE_NAME_CASE_OPTION:
534	  ignore_file_name_case = 1;
535	  break;
536
537	case INHIBIT_HUNK_MERGE_OPTION:
538	  /* This option is obsolete, but accept it for backward
539             compatibility.  */
540	  break;
541
542	case LEFT_COLUMN_OPTION:
543	  left_column = 1;
544	  break;
545
546	case LINE_FORMAT_OPTION:
547	  specify_style (OUTPUT_IFDEF);
548	  for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
549	    specify_value (&line_format[i], optarg, "--line-format");
550	  break;
551
552	case NO_IGNORE_FILE_NAME_CASE_OPTION:
553	  ignore_file_name_case = 0;
554	  break;
555
556	case NORMAL_OPTION:
557	  specify_style (OUTPUT_NORMAL);
558	  break;
559
560	case SDIFF_MERGE_ASSIST_OPTION:
561	  specify_style (OUTPUT_SDIFF);
562	  sdiff_merge_assist = 1;
563	  break;
564
565	case STRIP_TRAILING_CR_OPTION:
566	  strip_trailing_cr = 1;
567	  break;
568
569	case SUPPRESS_COMMON_LINES_OPTION:
570	  suppress_common_lines = 1;
571	  break;
572
573	case TO_FILE_OPTION:
574	  specify_value (&to_file, optarg, "--to-file");
575	  break;
576
577	case UNCHANGED_LINE_FORMAT_OPTION:
578	case OLD_LINE_FORMAT_OPTION:
579	case NEW_LINE_FORMAT_OPTION:
580	  specify_style (OUTPUT_IFDEF);
581	  c -= UNCHANGED_LINE_FORMAT_OPTION;
582	  specify_value (&line_format[c], optarg, line_format_option[c]);
583	  break;
584
585	case UNCHANGED_GROUP_FORMAT_OPTION:
586	case OLD_GROUP_FORMAT_OPTION:
587	case NEW_GROUP_FORMAT_OPTION:
588	case CHANGED_GROUP_FORMAT_OPTION:
589	  specify_style (OUTPUT_IFDEF);
590	  c -= UNCHANGED_GROUP_FORMAT_OPTION;
591	  specify_value (&group_format[c], optarg, group_format_option[c]);
592	  break;
593
594	default:
595	  try_help (0, 0);
596	}
597      prev = c;
598    }
599
600  if (output_style == OUTPUT_UNSPECIFIED)
601    {
602      if (show_c_function)
603	{
604	  specify_style (OUTPUT_CONTEXT);
605	  if (ocontext < 0)
606	    context = 3;
607	}
608      else
609	specify_style (OUTPUT_NORMAL);
610    }
611
612  if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
613    time_format = "%Y-%m-%d %H:%M:%S.%N %z";
614  else
615    {
616      /* See POSIX 1003.1-2001 for this format.  */
617      time_format = "%a %b %e %T %Y";
618    }
619
620  if (0 <= ocontext)
621    {
622      bool modern_usage = 200112 <= posix2_version ();
623
624      if ((output_style == OUTPUT_CONTEXT
625	   || output_style == OUTPUT_UNIFIED)
626	  && (context < ocontext
627	      || (ocontext < context && ! explicit_context)))
628	{
629	  if (modern_usage)
630	    {
631	      error (0, 0,
632		     _("`-%ld' option is obsolete; use `-%c %ld'"),
633		     (long) ocontext,
634		     output_style == OUTPUT_CONTEXT ? 'C' : 'U',
635		     (long) ocontext);
636	      try_help (0, 0);
637	    }
638	  context = ocontext;
639	}
640      else
641	{
642	  if (modern_usage)
643	    {
644	      error (0, 0, _("`-%ld' option is obsolete; omit it"),
645		     (long) ocontext);
646	      try_help (0, 0);
647	    }
648	}
649    }
650
651  {
652    /*
653     *	We maximize first the half line width, and then the gutter width,
654     *	according to the following constraints:
655     *	1.  Two half lines plus a gutter must fit in a line.
656     *	2.  If the half line width is nonzero:
657     *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
658     *	    b.  If tabs are not expanded to spaces,
659     *		a half line plus a gutter is an integral number of tabs,
660     *		so that tabs in the right column line up.
661     */
662    unsigned int t = expand_tabs ? 1 : TAB_WIDTH;
663    int w = width ? width : 130;
664    int off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t)  *  t;
665    sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
666    sdiff_column2_offset = sdiff_half_width ? off : w;
667  }
668
669  /* Make the horizon at least as large as the context, so that
670     shift_boundaries has more freedom to shift the first and last hunks.  */
671  if (horizon_lines < context)
672    horizon_lines = context;
673
674  summarize_regexp_list (&function_regexp_list);
675  summarize_regexp_list (&ignore_regexp_list);
676
677  if (output_style == OUTPUT_IFDEF)
678    {
679      for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
680	if (!line_format[i])
681	  line_format[i] = "%l\n";
682      if (!group_format[OLD])
683	group_format[OLD]
684	  = group_format[CHANGED] ? group_format[CHANGED] : "%<";
685      if (!group_format[NEW])
686	group_format[NEW]
687	  = group_format[CHANGED] ? group_format[CHANGED] : "%>";
688      if (!group_format[UNCHANGED])
689	group_format[UNCHANGED] = "%=";
690      if (!group_format[CHANGED])
691	group_format[CHANGED] = concat (group_format[OLD],
692					group_format[NEW], "");
693    }
694
695  no_diff_means_no_output =
696    (output_style == OUTPUT_IFDEF ?
697      (!*group_format[UNCHANGED]
698       || (strcmp (group_format[UNCHANGED], "%=") == 0
699	   && !*line_format[UNCHANGED]))
700     : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
701
702  files_can_be_treated_as_binary =
703    (brief
704     & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
705	  | (ignore_regexp_list.regexps || ignore_white_space)));
706
707  switch_string = option_list (argv + 1, optind - 1);
708
709  if (from_file)
710    {
711      if (to_file)
712	fatal ("--from-file and --to-file both specified");
713      else
714	for (; optind < argc; optind++)
715	  {
716	    int status = compare_files ((struct comparison *) 0,
717					from_file, argv[optind]);
718	    if (exit_status < status)
719	      exit_status = status;
720	  }
721    }
722  else
723    {
724      if (to_file)
725	for (; optind < argc; optind++)
726	  {
727	    int status = compare_files ((struct comparison *) 0,
728					argv[optind], to_file);
729	    if (exit_status < status)
730	      exit_status = status;
731	  }
732      else
733	{
734	  if (argc - optind != 2)
735	    {
736	      if (argc - optind < 2)
737		try_help ("missing operand after `%s'", argv[argc - 1]);
738	      else
739		try_help ("extra operand `%s'", argv[optind + 2]);
740	    }
741
742	  exit_status = compare_files ((struct comparison *) 0,
743				       argv[optind], argv[optind + 1]);
744	}
745    }
746
747  /* Print any messages that were saved up for last.  */
748  print_message_queue ();
749
750  check_stdout ();
751  exit (exit_status);
752  return exit_status;
753}
754
755/* Append to REGLIST the regexp PATTERN.  */
756
757static void
758add_regexp (struct regexp_list *reglist, char const *pattern)
759{
760  size_t patlen = strlen (pattern);
761  char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
762
763  if (m != 0)
764    error (0, 0, "%s: %s", pattern, m);
765  else
766    {
767      char *regexps = reglist->regexps;
768      size_t len = reglist->len;
769      bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
770      size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
771      size_t size = reglist->size;
772
773      if (size <= newlen)
774	{
775	  if (!size)
776	    size = 1;
777
778	  do size *= 2;
779	  while (size <= newlen);
780
781	  reglist->size = size;
782	  reglist->regexps = regexps = xrealloc (regexps, size);
783	}
784      if (multiple_regexps)
785	{
786	  regexps[len++] = '\\';
787	  regexps[len++] = '|';
788	}
789      memcpy (regexps + len, pattern, patlen + 1);
790    }
791}
792
793/* Ensure that REGLIST represents the disjunction of its regexps.
794   This is done here, rather than earlier, to avoid O(N^2) behavior.  */
795
796static void
797summarize_regexp_list (struct regexp_list *reglist)
798{
799  if (reglist->regexps)
800    {
801      /* At least one regexp was specified.  Allocate a fastmap for it.  */
802      reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
803      if (reglist->multiple_regexps)
804	{
805	  /* Compile the disjunction of the regexps.
806	     (If just one regexp was specified, it is already compiled.)  */
807	  char const *m = re_compile_pattern (reglist->regexps, reglist->len,
808					      reglist->buf);
809	  if (m != 0)
810	    error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
811	}
812    }
813}
814
815static void
816try_help (char const *reason_msgid, char const *operand)
817{
818  if (reason_msgid)
819    error (0, 0, _(reason_msgid), operand);
820  error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
821	 program_name);
822  abort ();
823}
824
825static void
826check_stdout (void)
827{
828  if (ferror (stdout))
829    fatal ("write failed");
830  else if (fclose (stdout) != 0)
831    pfatal_with_name (_("standard output"));
832}
833
834static char const * const option_help_msgid[] = {
835  N_("Compare files line by line."),
836  "",
837  N_("-i  --ignore-case  Ignore case differences in file contents."),
838  N_("--ignore-file-name-case  Ignore case when comparing file names."),
839  N_("--no-ignore-file-name-case  Consider case when comparing file names."),
840  N_("-E  --ignore-tab-expansion  Ignore changes due to tab expansion."),
841  N_("-b  --ignore-space-change  Ignore changes in the amount of white space."),
842  N_("-w  --ignore-all-space  Ignore all white space."),
843  N_("-B  --ignore-blank-lines  Ignore changes whose lines are all blank."),
844  N_("-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE."),
845  N_("--strip-trailing-cr  Strip trailing carriage return on input."),
846#if HAVE_SETMODE_DOS
847  N_("--binary  Read and write data in binary mode."),
848#endif
849  N_("-a  --text  Treat all files as text."),
850  "",
851  N_("-c  -C NUM  --context[=NUM]  Output NUM (default 3) lines of copied context.\n\
852-u  -U NUM  --unified[=NUM]  Output NUM (default 3) lines of unified context.\n\
853  --label LABEL  Use LABEL instead of file name.\n\
854  -p  --show-c-function  Show which C function each change is in.\n\
855  -F RE  --show-function-line=RE  Show the most recent line matching RE."),
856  N_("-q  --brief  Output only whether files differ."),
857  N_("-e  --ed  Output an ed script."),
858  N_("--normal  Output a normal diff."),
859  N_("-n  --rcs  Output an RCS format diff."),
860  N_("-y  --side-by-side  Output in two columns.\n\
861  -W NUM  --width=NUM  Output at most NUM (default 130) print columns.\n\
862  --left-column  Output only the left column of common lines.\n\
863  --suppress-common-lines  Do not output common lines."),
864  N_("-D NAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs."),
865  N_("--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT."),
866  N_("--line-format=LFMT  Similar, but format all input lines with LFMT."),
867  N_("--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT."),
868  N_("  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'."),
869  N_("  GFMT may contain:\n\
870    %<  lines from FILE1\n\
871    %>  lines from FILE2\n\
872    %=  lines common to FILE1 and FILE2\n\
873    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER\n\
874      LETTERs are as follows for new group, lower case for old group:\n\
875        F  first line number\n\
876        L  last line number\n\
877        N  number of lines = L-F+1\n\
878        E  F-1\n\
879        M  L+1"),
880  N_("  LFMT may contain:\n\
881    %L  contents of line\n\
882    %l  contents of line, excluding any trailing newline\n\
883    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number"),
884  N_("  Either GFMT or LFMT may contain:\n\
885    %%  %\n\
886    %c'C'  the single character C\n\
887    %c'\\OOO'  the character with octal code OOO"),
888  "",
889  N_("-l  --paginate  Pass the output through `pr' to paginate it."),
890  N_("-t  --expand-tabs  Expand tabs to spaces in output."),
891  N_("-T  --initial-tab  Make tabs line up by prepending a tab."),
892  "",
893  N_("-r  --recursive  Recursively compare any subdirectories found."),
894  N_("-N  --new-file  Treat absent files as empty."),
895  N_("--unidirectional-new-file  Treat absent first files as empty."),
896  N_("-s  --report-identical-files  Report when two files are the same."),
897  N_("-x PAT  --exclude=PAT  Exclude files that match PAT."),
898  N_("-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE."),
899  N_("-S FILE  --starting-file=FILE  Start with FILE when comparing directories."),
900  N_("--from-file=FILE1  Compare FILE1 to all operands.  FILE1 can be a directory."),
901  N_("--to-file=FILE2  Compare all operands to FILE2.  FILE2 can be a directory."),
902  "",
903  N_("--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix."),
904  N_("-d  --minimal  Try hard to find a smaller set of changes."),
905  N_("--speed-large-files  Assume large files and many scattered small changes."),
906  "",
907  N_("-v  --version  Output version info."),
908  N_("--help  Output this help."),
909  "",
910  N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
911  N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
912  N_("If a FILE is `-', read standard input."),
913  "",
914  N_("Report bugs to <bug-gnu-utils@gnu.org>."),
915  0
916};
917
918static void
919usage (void)
920{
921  char const * const *p;
922
923  printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
924
925  for (p = option_help_msgid;  *p;  p++)
926    {
927      if (!**p)
928	putchar ('\n');
929      else
930	{
931	  char const *msg = _(*p);
932	  char const *nl;
933	  while ((nl = strchr (msg, '\n')))
934	    {
935	      int msglen = nl + 1 - msg;
936	      printf ("  %.*s", msglen, msg);
937	      msg = nl + 1;
938	    }
939
940	  printf ("  %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
941	}
942    }
943}
944
945/* Set VAR to VALUE, reporting an OPTION error if this is a
946   conflict.  */
947static void
948specify_value (char const **var, char const *value, char const *option)
949{
950  if (*var && strcmp (*var, value) != 0)
951    {
952      error (0, 0, _("conflicting %s option value `%s'"), option, value);
953      try_help (0, 0);
954    }
955  *var = value;
956}
957
958/* Set the output style to STYLE, diagnosing conflicts.  */
959static void
960specify_style (enum output_style style)
961{
962  if (output_style != style)
963    {
964      if (output_style != OUTPUT_UNSPECIFIED)
965	try_help ("conflicting output style options", 0);
966      output_style = style;
967    }
968}
969
970static char const *
971filetype (struct stat const *st)
972{
973  /* See POSIX 1003.1-2001 for these formats.
974
975     To keep diagnostics grammatical in English, the returned string
976     must start with a consonant.  */
977
978  if (S_ISREG (st->st_mode))
979    return st->st_size == 0 ? _("regular empty file") : _("regular file");
980
981  if (S_ISDIR (st->st_mode)) return _("directory");
982
983#ifdef S_ISBLK
984  if (S_ISBLK (st->st_mode)) return _("block special file");
985#endif
986#ifdef S_ISCHR
987  if (S_ISCHR (st->st_mode)) return _("character special file");
988#endif
989#ifdef S_ISFIFO
990  if (S_ISFIFO (st->st_mode)) return _("fifo");
991#endif
992  /* S_ISLNK is impossible with `fstat' and `stat'.  */
993#ifdef S_ISSOCK
994  if (S_ISSOCK (st->st_mode)) return _("socket");
995#endif
996#ifdef S_TYPEISMQ
997  if (S_TYPEISMQ (st)) return _("message queue");
998#endif
999#ifdef S_TYPEISSEM
1000  if (S_TYPEISSEM (st)) return _("semaphore");
1001#endif
1002#ifdef S_TYPEISSHM
1003  if (S_TYPEISSHM (st)) return _("shared memory object");
1004#endif
1005#ifdef S_TYPEISTMO
1006  if (S_TYPEISTMO (st)) return _("typed memory object");
1007#endif
1008
1009  return _("weird file");
1010}
1011
1012/* Set the last-modified time of *ST to be the current time.  */
1013
1014static void
1015set_mtime_to_now (struct stat *st)
1016{
1017#ifdef ST_MTIM_NSEC
1018
1019# if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1020  if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1021    return;
1022# endif
1023
1024# if HAVE_GETTIMEOFDAY
1025  {
1026    struct timeval timeval;
1027    if (gettimeofday (&timeval, NULL) == 0)
1028      {
1029	st->st_mtime = timeval.tv_sec;
1030	st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1031	return;
1032      }
1033  }
1034# endif
1035
1036#endif /* ST_MTIM_NSEC */
1037
1038  time (&st->st_mtime);
1039}
1040
1041/* Compare two files (or dirs) with parent comparison PARENT
1042   and names NAME0 and NAME1.
1043   (If PARENT is 0, then the first name is just NAME0, etc.)
1044   This is self-contained; it opens the files and closes them.
1045
1046   Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1047   different, EXIT_TROUBLE if there is a problem opening them.  */
1048
1049static int
1050compare_files (struct comparison const *parent,
1051	       char const *name0,
1052	       char const *name1)
1053{
1054  struct comparison cmp;
1055#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1056  register int f;
1057  int status = EXIT_SUCCESS;
1058  bool same_files;
1059  char *free0, *free1;
1060
1061  /* If this is directory comparison, perhaps we have a file
1062     that exists only in one of the directories.
1063     If so, just print a message to that effect.  */
1064
1065  if (! ((name0 && name1)
1066	 || (unidirectional_new_file && name1)
1067	 || new_file))
1068    {
1069      char const *name = name0 == 0 ? name1 : name0;
1070      char const *dir = parent->file[name0 == 0].name;
1071
1072      /* See POSIX 1003.1-2001 for this format.  */
1073      message ("Only in %s: %s\n", dir, name);
1074
1075      /* Return EXIT_FAILURE so that diff_dirs will return
1076	 EXIT_FAILURE ("some files differ").  */
1077      return EXIT_FAILURE;
1078    }
1079
1080  memset (cmp.file, 0, sizeof cmp.file);
1081  cmp.parent = parent;
1082
1083  /* cmp.file[f].desc markers */
1084#define NONEXISTENT (-1) /* nonexistent file */
1085#define UNOPENED (-2) /* unopened file (e.g. directory) */
1086#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1087
1088#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1089
1090  cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1091  cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1092
1093  /* Now record the full name of each file, including nonexistent ones.  */
1094
1095  if (name0 == 0)
1096    name0 = name1;
1097  if (name1 == 0)
1098    name1 = name0;
1099
1100  if (!parent)
1101    {
1102      free0 = 0;
1103      free1 = 0;
1104      cmp.file[0].name = name0;
1105      cmp.file[1].name = name1;
1106    }
1107  else
1108    {
1109      cmp.file[0].name = free0
1110	= dir_file_pathname (parent->file[0].name, name0);
1111      cmp.file[1].name = free1
1112	= dir_file_pathname (parent->file[1].name, name1);
1113    }
1114
1115  /* Stat the files.  */
1116
1117  for (f = 0; f < 2; f++)
1118    {
1119      if (cmp.file[f].desc != NONEXISTENT)
1120	{
1121	  if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1122	    {
1123	      cmp.file[f].desc = cmp.file[0].desc;
1124	      cmp.file[f].stat = cmp.file[0].stat;
1125	    }
1126	  else if (strcmp (cmp.file[f].name, "-") == 0)
1127	    {
1128	      cmp.file[f].desc = STDIN_FILENO;
1129	      if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1130		cmp.file[f].desc = ERRNO_ENCODE (errno);
1131	      else
1132		{
1133		  if (S_ISREG (cmp.file[f].stat.st_mode))
1134		    {
1135		      off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1136		      if (pos < 0)
1137			cmp.file[f].desc = ERRNO_ENCODE (errno);
1138		      else
1139			cmp.file[f].stat.st_size =
1140			  MAX (0, cmp.file[f].stat.st_size - pos);
1141		    }
1142
1143		  /* POSIX 1003.1-2001 requires current time for
1144		     stdin.  */
1145		  set_mtime_to_now (&cmp.file[f].stat);
1146		}
1147	    }
1148	  else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1149	    cmp.file[f].desc = ERRNO_ENCODE (errno);
1150	}
1151    }
1152
1153  /* Mark files as nonexistent at the top level as needed for -N and
1154     --unidirectional-new-file.  */
1155  if (! parent)
1156    {
1157      if ((new_file | unidirectional_new_file)
1158	  && cmp.file[0].desc == ERRNO_ENCODE (ENOENT)
1159	  && cmp.file[1].desc == UNOPENED)
1160	cmp.file[0].desc = NONEXISTENT;
1161
1162      if (new_file
1163	  && cmp.file[0].desc == UNOPENED
1164	  && cmp.file[1].desc == ERRNO_ENCODE (ENOENT))
1165	cmp.file[1].desc = NONEXISTENT;
1166    }
1167
1168  for (f = 0; f < 2; f++)
1169    if (cmp.file[f].desc == NONEXISTENT)
1170      cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1171
1172  for (f = 0; f < 2; f++)
1173    {
1174      int e = ERRNO_DECODE (cmp.file[f].desc);
1175      if (0 <= e)
1176	{
1177	  errno = e;
1178	  perror_with_name (cmp.file[f].name);
1179	  status = EXIT_TROUBLE;
1180	}
1181    }
1182
1183  if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1184    {
1185      /* If one is a directory, and it was specified in the command line,
1186	 use the file in that dir with the other file's basename.  */
1187
1188      int fnm_arg = DIR_P (0);
1189      int dir_arg = 1 - fnm_arg;
1190      char const *fnm = cmp.file[fnm_arg].name;
1191      char const *dir = cmp.file[dir_arg].name;
1192      char const *filename = cmp.file[dir_arg].name = free0
1193	= dir_file_pathname (dir, base_name (fnm));
1194
1195      if (strcmp (fnm, "-") == 0)
1196	fatal ("cannot compare `-' to a directory");
1197
1198      if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1199	{
1200	  perror_with_name (filename);
1201	  status = EXIT_TROUBLE;
1202	}
1203    }
1204
1205  if (status != EXIT_SUCCESS)
1206    {
1207      /* One of the files should exist but does not.  */
1208    }
1209  else if ((same_files
1210	    = (cmp.file[0].desc != NONEXISTENT
1211	       && cmp.file[1].desc != NONEXISTENT
1212	       && (same_special_file (&cmp.file[0].stat, &cmp.file[1].stat)
1213	           || (0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1214		       && same_file_attributes (&cmp.file[0].stat,
1215					&cmp.file[1].stat)))))
1216	   && no_diff_means_no_output)
1217    {
1218      /* The two named files are actually the same physical file.
1219	 We know they are identical without actually reading them.  */
1220    }
1221  else if (DIR_P (0) & DIR_P (1))
1222    {
1223      if (output_style == OUTPUT_IFDEF)
1224	fatal ("-D option not supported with directories");
1225
1226      /* If both are directories, compare the files in them.  */
1227
1228      if (parent && !recursive)
1229	{
1230	  /* But don't compare dir contents one level down
1231	     unless -r was specified.
1232	     See POSIX 1003.1-2001 for this format.  */
1233	  message ("Common subdirectories: %s and %s\n",
1234		   cmp.file[0].name, cmp.file[1].name);
1235	}
1236      else
1237	status = diff_dirs (&cmp, compare_files);
1238    }
1239  else if ((DIR_P (0) | DIR_P (1))
1240	   || (parent
1241	       && (! S_ISREG (cmp.file[0].stat.st_mode)
1242		   || ! S_ISREG (cmp.file[1].stat.st_mode))))
1243    {
1244      if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1245	{
1246	  /* We have a subdirectory that exists only in one directory.  */
1247
1248	  if ((DIR_P (0) | DIR_P (1))
1249	      && recursive
1250	      && (new_file
1251		  || (unidirectional_new_file
1252		      && cmp.file[0].desc == NONEXISTENT)))
1253	    status = diff_dirs (&cmp, compare_files);
1254	  else
1255	    {
1256	      char const *dir
1257		= parent->file[cmp.file[0].desc == NONEXISTENT].name;
1258
1259	      /* See POSIX 1003.1-2001 for this format.  */
1260	      message ("Only in %s: %s\n", dir, name0);
1261
1262	      status = EXIT_FAILURE;
1263	    }
1264	}
1265      else
1266	{
1267	  /* We have two files that are not to be compared.  */
1268
1269	  /* See POSIX 1003.1-2001 for this format.  */
1270	  message5 ("File %s is a %s while file %s is a %s\n",
1271		    file_label[0] ? file_label[0] : cmp.file[0].name,
1272		    filetype (&cmp.file[0].stat),
1273		    file_label[1] ? file_label[1] : cmp.file[1].name,
1274		    filetype (&cmp.file[1].stat));
1275
1276	  /* This is a difference.  */
1277	  status = EXIT_FAILURE;
1278	}
1279    }
1280  else if (files_can_be_treated_as_binary
1281	   && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
1282	   && (cmp.file[0].desc == NONEXISTENT
1283	       || S_ISREG (cmp.file[0].stat.st_mode))
1284	   && (cmp.file[1].desc == NONEXISTENT
1285	       || S_ISREG (cmp.file[1].stat.st_mode)))
1286    {
1287      message ("Files %s and %s differ\n",
1288	       file_label[0] ? file_label[0] : cmp.file[0].name,
1289	       file_label[1] ? file_label[1] : cmp.file[1].name);
1290      status = EXIT_FAILURE;
1291    }
1292  else
1293    {
1294      /* Both exist and neither is a directory.  */
1295
1296      /* Open the files and record their descriptors.  */
1297
1298      if (cmp.file[0].desc == UNOPENED)
1299	if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1300	  {
1301	    perror_with_name (cmp.file[0].name);
1302	    status = EXIT_TROUBLE;
1303	  }
1304      if (cmp.file[1].desc == UNOPENED)
1305	{
1306	  if (same_files)
1307	    cmp.file[1].desc = cmp.file[0].desc;
1308	  else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1309		   < 0)
1310	    {
1311	      perror_with_name (cmp.file[1].name);
1312	      status = EXIT_TROUBLE;
1313	    }
1314	}
1315
1316#if HAVE_SETMODE_DOS
1317      if (binary)
1318	for (f = 0; f < 2; f++)
1319	  if (0 <= cmp.file[f].desc)
1320	    set_binary_mode (cmp.file[f].desc, 1);
1321#endif
1322
1323      /* Compare the files, if no error was found.  */
1324
1325      if (status == EXIT_SUCCESS)
1326	status = diff_2_files (&cmp);
1327
1328      /* Close the file descriptors.  */
1329
1330      if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1331	{
1332	  perror_with_name (cmp.file[0].name);
1333	  status = EXIT_TROUBLE;
1334	}
1335      if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1336	  && close (cmp.file[1].desc) != 0)
1337	{
1338	  perror_with_name (cmp.file[1].name);
1339	  status = EXIT_TROUBLE;
1340	}
1341    }
1342
1343  /* Now the comparison has been done, if no error prevented it,
1344     and STATUS is the value this function will return.  */
1345
1346  if (status == EXIT_SUCCESS)
1347    {
1348      if (report_identical_files && !DIR_P (0))
1349	message ("Files %s and %s are identical\n",
1350		 file_label[0] ? file_label[0] : cmp.file[0].name,
1351		 file_label[1] ? file_label[1] : cmp.file[1].name);
1352    }
1353  else
1354    {
1355      /* Flush stdout so that the user sees differences immediately.
1356	 This can hurt performance, unfortunately.  */
1357      if (fflush (stdout) != 0)
1358	pfatal_with_name (_("standard output"));
1359    }
1360
1361  if (free0)
1362    free (free0);
1363  if (free1)
1364    free (free1);
1365
1366  return status;
1367}
1368