1/******************************************************************************
2 * Copyright 1994-2010,2012 by Thomas E. Dickey                               *
3 * All Rights Reserved.                                                       *
4 *                                                                            *
5 * Permission to use, copy, modify, and distribute this software and its      *
6 * documentation for any purpose and without fee is hereby granted, provided  *
7 * that the above copyright notice appear in all copies and that both that    *
8 * copyright notice and this permission notice appear in supporting           *
9 * documentation, and that the name of the above listed copyright holder(s)   *
10 * not be used in advertising or publicity pertaining to distribution of the  *
11 * software without specific, written prior permission.                       *
12 *                                                                            *
13 * THE ABOVE LISTED COPYRIGHT HOLDER(S) DISCLAIM ALL WARRANTIES WITH REGARD   *
14 * TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND  *
15 * FITNESS, IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE  *
16 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES          *
17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN      *
18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR *
19 * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.                *
20 ******************************************************************************/
21
22#ifndef	NO_IDENT
23static const char *Id = "$Id: diffstat.c,v 1.55 2012/01/03 09:44:24 tom Exp $";
24#endif
25
26/*
27 * Title:	diffstat.c
28 * Author:	T.E.Dickey
29 * Created:	02 Feb 1992
30 * Modified:
31 *		03 Jan 2012, Correct case for "xz" suffix in is_compressed()
32 *			     (patch from Frederic Culot in FreeBSD ports).  Add
33 *			     "-R" option.  Improve dequoting of filenames in
34 *			     headers.
35 *		10 Oct 2010, correct display of new files when -S/-D options
36 *			     are used.  Remove the temporary directory on
37 *			     error, introduced in 1.48+ (patch by Solar
38 *			     Designer).
39 *		19 Jul 2010, add missing "break" statement which left "-c"
40 *			     option falling-through into "-C".
41 *		16 Jul 2010, configure "xz" path explicitly, in case lzcat
42 *			     does not support xz format.  Add "-s" (summary)
43 *			     and "-C" (color) options.
44 *		15 Jul 2010, fix strict gcc warnings, e.g., using const.
45 *		10 Jan 2010, improve a case where filenames have embedded blanks
46 *			     (patch by Reinier Post).
47 *		07 Nov 2009, correct suffix-check for ".xz" files as
48 *			     command-line parameters rather than as piped
49 *			     input (report by Moritz Barsnick).
50 *		06 Oct 2009, fixes to build/run with MSYS or MinGW.  use
51 *			     $TMPDIR for path of temporary file used in
52 *			     decompression.  correct else-condition for
53 *			     detecting compression type (patch by Zach Hirsch).
54 *		31 Aug 2009, improve lzma support, add support for xz (patch by
55 *			     Eric Blake).  Add special case for no-newline
56 *			     message from some diff's (Ubuntu #269895).
57 *			     Improve configure check for getopt().
58 *		11 Aug 2009, Add logic to check standard input, decompress if
59 *			     possible.  Add -N option, to truncate long names.
60 *			     Add pack/pcat as a compression type.
61 *			     Add lzma/lzcat as a compression type.
62 *			     Allow overriding program paths with environment.
63 *		10 Aug 2009, modify to work with Perforce-style diffs (patch
64 *			     by Ed Schouten).
65 *		29 Mar 2009, modify to work with patch ".rej" files, which have
66 *			     no filename header (use the name of the ".rej"
67 *			     file if it is available).
68 *		29 Sep 2008, fix typo in usage message.
69 *		06 Aug 2008, add "-m", "-S" and "-D" options.
70 *		05 Aug 2008, add "-q" option to suppress 0-files-changed
71 *			     message (patch by Greg Norris).
72 *		04 Sep 2007, add "-b" option to suppress binary-files (patch
73 *			     by Greg Norris).
74 *		26 Aug 2007, add "-d" option to show debugging traces, rather
75 *			     than by defining DEBUG.  Add check after
76 *			     unified-diff chunk to avoid adding non-diff text
77 *			     (report by Adrian Bunk).  Quote pathname passed
78 *			     in command to gzip/uncompress.  Add a check for
79 *			     default-diff output without the "diff" command
80 *			     supplied to provide filename, mark as "unknown".
81 *		16 Jul 2006, fix to avoid modifying which is being used by
82 *			     tsearch() for ordering the binary tree (report by
83 *			     Adrian Bunk).
84 *		02 Jul 2006, do not ignore pathnames in /tmp/, since some tools
85 *			     create usable pathnames for both old/new files
86 *			     there (Debian #376086).  Correct ifdef for
87 *			     fgetc_unlocked().  Add configure check for
88 *			     compress, gzip and bzip2 programs that may be used
89 *			     to decompress files.
90 *		24 Aug 2005, update usage message for -l, -r changes.
91 *		15 Aug 2005, apply PLURAL() to num_files (Jean Delvare).
92 *			     add -l option (request by Michael Burian).
93 *			     Use fgetc_locked() if available.
94 *		14 Aug 2005, add -r2 option (rounding with adjustment to ensure
95 *			     that nonzero values always display a histogram
96 *			     bar), adapted from patch by Jean Delvare.  Extend
97 *			     the -f option (2=filled, 4=verbose).
98 *		12 Aug 2005, modify to use tsearch() for sorted lists.
99 *		11 Aug 2005, minor fixes to scaling of modified lines.  Add
100 *			     -r (round) option.
101 *		05 Aug 2005, add -t (table) option.
102 *		10 Apr 2005, change order of merging and prefix-stripping so
103 *			     stripping all prefixes, e.g., with -p9, will be
104 *			     sorted as expected (Patch by Jean Delvare
105 *			     <khali@linux-fr.org>).
106 *		10 Jan 2005, add support for '--help' and '--version' (Patch
107 *			     by Eric Blake <ebb9@byu.net>.)
108 *		16 Dec 2004, fix a different case for data beginning with "--"
109 *			     which was treated as a header line.
110 *		14 Dec 2004, Fix allocation problems.  Open files in binary
111 *			     mode for reading.  Getopt returns -1, not
112 *			     necessarily EOF.  Add const where useful.  Use
113 *			     NO_IDENT where necessary.  malloc() comes from
114 *			     <stdlib.h> in standard systems (Patch by Eric
115 *			     Blake <ebb9@byu.net>.)
116 *		08 Nov 2004, minor fix for resync of unified diffs checks for
117 *			     range (line beginning with '@' without header
118 *			     lines (successive lines beginning with "---" and
119 *			     "+++").  Fix a few problems reported by valgrind.
120 *		09 Nov 2003, modify check for lines beginning with '-' or '+'
121 *			     to treat only "---" in old-style diffs as a
122 *			     special case.
123 *		14 Feb 2003, modify check for filenames to allow for some cases
124 *			     of incomplete dates (the reported example omitted
125 *			     the day of the month).  Correct a typo in usage().
126 *			     Add -e, -h, -o options.
127 *		04 Jan 2003, improve tracking of chunks in unified diff, in
128 *			     case the original files contained a '+' or '-' in
129 *			     the first column (Debian #155000).  Add -v option
130 *			     (Debian #170947).  Modify to allocate buffers big
131 *			     enough for long input lines.  Do additional
132 *			     merging to handle unusual Index/diff constructs in
133 *			     recent makepatch script.
134 *		20 Aug 2002, add -u option to tell diffstat to preserve the
135 *			     order of filenames as given rather than sort them
136 *			     (request by H Peter Anvin <hpa@zytor.com>).  Add
137 *			     -k option for completeness.
138 *		09 Aug 2002, allow either '/' or '-' as delimiters in dates,
139 *			     to accommodate diffutils 2.8 (report by Rik van
140 *			     Riel <riel@conectiva.com.br>).
141 *		10 Oct 2001, add bzip2 (.bz2) suffix as suggested by
142 *			     Gregory T Norris <haphazard@socket.net> in Debian
143 *			     bug report #82969).
144 *			     add check for diff from RCS archive where the
145 *			     "diff" lines do not reference a filename.
146 *		29 Mar 2000, add -c option.  Check for compressed input, read
147 *			     via pipe.  Change to ANSI C.  Adapted change from
148 *			     Troy Engel to add option that displays a number
149 *			     only, rather than a histogram.
150 *		17 May 1998, handle Debian diff files, which do not contain
151 *			     dates on the header lines.
152 *		16 Jan 1998, accommodate patches w/o tabs in header lines (e.g.,
153 *			     from cut/paste).  Strip suffixes such as ".orig".
154 *		24 Mar 1996, corrected -p0 logic, more fixes in do_merging.
155 *		16 Mar 1996, corrected state-change for "Binary".  Added -p
156 *			     option.
157 *		17 Dec 1995, corrected matching algorithm in 'do_merging()'
158 *		11 Dec 1995, mods to accommodate diffs against /dev/null or
159 *			     /tmp/XXX (tempfiles).
160 *		06 May 1995, limit scaling -- only shrink-to-fit.
161 *		29 Apr 1995, recognize 'rcsdiff -u' format.
162 *		26 Dec 1994, strip common pathname-prefix.
163 *		13 Nov 1994, added '-n' option.  Corrected logic of 'match'.
164 *		17 Jun 1994, ifdef-<string.h>
165 *		12 Jun 1994, recognize unified diff, and output of makepatch.
166 *		04 Oct 1993, merge multiple diff-files, busy message when the
167 *			     output is piped to a file.
168 *
169 * Function:	this program reads the output of 'diff' and displays a histogram
170 *		of the insertions/deletions/modifications per-file.
171 */
172
173#if defined(HAVE_CONFIG_H)
174#include <config.h>
175#endif
176
177#if defined(WIN32) && !defined(HAVE_CONFIG_H)
178#define HAVE_STDLIB_H
179#define HAVE_STRING_H
180#define HAVE_MALLOC_H
181#define HAVE_GETOPT_H
182#endif
183
184#include <stdio.h>
185#include <ctype.h>
186
187#ifdef HAVE_STRING_H
188#include <string.h>
189#else
190#include <strings.h>
191#define strchr index
192#define strrchr rindex
193#endif
194
195#ifdef HAVE_STDLIB_H
196#include <stdlib.h>
197#else
198extern int atoi(const char *);
199#endif
200
201#ifdef HAVE_UNISTD_H
202#include <unistd.h>
203#else
204extern int isatty(int);
205#endif
206
207#ifdef HAVE_MALLOC_H
208#include <malloc.h>
209#endif
210
211#if defined(HAVE_SEARCH_H) && defined(HAVE_TSEARCH)
212#include <search.h>
213#else
214#undef HAVE_TSEARCH
215#endif
216
217#ifdef HAVE_GETC_UNLOCKED
218#define MY_GETC getc_unlocked
219#else
220#define MY_GETC getc
221#endif
222
223#ifdef HAVE_GETOPT_H
224#include <getopt.h>
225#elif !defined(HAVE_GETOPT_HEADER)
226extern int getopt(int, char *const *, const char *);
227extern char *optarg;
228extern int optind;
229#endif
230
231#include <sys/types.h>
232#include <sys/stat.h>
233
234#if !defined(EXIT_SUCCESS)
235#define EXIT_SUCCESS 0
236#define EXIT_FAILURE 1
237#endif
238
239#ifndef BZCAT_PATH
240#define BZCAT_PATH ""
241#endif
242
243#ifndef BZIP2_PATH
244#define BZIP2_PATH ""
245#endif
246
247#ifndef COMPRESS_PATH
248#define COMPRESS_PATH ""
249#endif
250
251#ifndef GZIP_PATH
252#define GZIP_PATH ""
253#endif
254
255#ifndef LZCAT_PATH
256#define LZCAT_PATH ""
257#endif
258
259#ifndef PCAT_PATH
260#define PCAT_PATH ""
261#endif
262
263#ifndef UNCOMPRESS_PATH
264#define UNCOMPRESS_PATH ""
265#endif
266
267#ifndef XZ_PATH
268#define XZ_PATH ""
269#endif
270
271#ifndef ZCAT_PATH
272#define ZCAT_PATH ""
273#endif
274
275/******************************************************************************/
276
277#if defined(__MINGW32__) || defined(WIN32)
278#define MKDIR(name,mode) mkdir(name)
279#else
280#define MKDIR(name,mode) mkdir(name,mode)
281#endif
282
283#if defined(WIN32) && !defined(__MINGW32__)
284#define PATHSEP '\\'
285#else
286#define PATHSEP '/'
287#endif
288
289#define SQUOTE  '\''
290#define EOS     '\0'
291#define BLANK   ' '
292
293#define UC(c)   ((unsigned char)(c))
294
295#ifndef OPT_TRACE
296#define OPT_TRACE 1
297#endif
298
299#if OPT_TRACE
300#define TRACE(p) if (trace_opt) printf p
301#else
302#define TRACE(p)		/*nothing */
303#endif
304
305#define contain_any(s,reject) (strcspn(s,reject) != strlen(s))
306
307#define HAVE_NOTHING 0
308#define HAVE_GENERIC 1		/* e.g., "Index: foo" w/o pathname */
309#define HAVE_PATH    2		/* reference-file from "diff dirname/foo" */
310#define HAVE_PATH2   4		/* comparison-file from "diff dirname/foo" */
311
312#define FMT_CONCISE  0
313#define FMT_NORMAL   1
314#define FMT_FILLED   2
315#define FMT_VERBOSE  4
316
317typedef enum comment {
318    Normal, Only, Binary
319} Comment;
320
321#define MARKS 4			/* each of +, - and ! */
322
323typedef enum {
324    cInsert = 0,
325    cDelete,
326    cModify,
327    cEquals
328} Change;
329
330#define InsOf(p) (p)->count[cInsert]	/* "+" count inserted lines */
331#define DelOf(p) (p)->count[cDelete]	/* "-" count deleted lines */
332#define ModOf(p) (p)->count[cModify]	/* "!" count modified lines */
333#define EqlOf(p) (p)->count[cEquals]	/* "=" count unmodified lines */
334
335#define TotalOf(p) (InsOf(p) + DelOf(p) + ModOf(p) + EqlOf(p))
336#define for_each_mark(n) for (n = 0; n < num_marks; ++n)
337
338typedef struct _data {
339    struct _data *link;
340    char *name;			/* the filename */
341    int copy;			/* true if filename is const-literal */
342    int base;			/* beginning of name if -p option used */
343    Comment cmt;
344    int pending;
345    long chunks;		/* total number of chunks */
346    long chunk[MARKS];		/* counts for the current chunk */
347    long count[MARKS];		/* counts for the file */
348} DATA;
349
350typedef enum {
351    dcNone = 0,
352    dcBzip,
353    dcCompress,
354    dcGzip,
355    dcLzma,
356    dcPack,
357    dcXz,
358    dcEmpty
359} Decompress;
360
361static const char marks[MARKS + 1] = "+-!=";
362static const int colors[MARKS + 1] =
363{2, 1, 6, 4};
364
365static DATA *all_data;
366static const char *comment_opt = "";
367static char *path_opt = 0;
368static int format_opt = FMT_NORMAL;
369static int max_width;		/* the specified width-limit */
370static int merge_names = 1;	/* true if we merge similar filenames */
371static int merge_opt = 0;	/* true if we merge ins/del as modified */
372static int min_name_wide;	/* minimum amount reserved for filenames */
373static int max_name_wide;	/* maximum amount reserved for filenames */
374static int names_only;		/* true if we list filenames only */
375static int num_marks = 3;	/* 3 or 4, according to "-P" option */
376static int reverse_opt;		/* true if results are reversed */
377static int show_colors;		/* true if showing SGR colors */
378static int show_progress;	/* if not writing to tty, show progress */
379static int summary_only = 0;	/* true if only summary line is shown */
380static int path_dest;		/* true if path_opt is destination (patched) */
381static int plot_width;		/* the amount left over for histogram */
382static int prefix_opt = -1;	/* if positive, controls stripping of PATHSEP */
383static int round_opt = 0;	/* if nonzero, round data for histogram */
384static int table_opt = 0;	/* if nonzero, write table rather than plot */
385static int trace_opt = 0;	/* if nonzero, write debugging information */
386static int sort_names = 1;	/* true if we sort filenames */
387static int verbose = 0;		/* -v option */
388static int quiet = 0;		/* -q option */
389static int suppress_binary = 0;	/* -b option */
390static long plot_scale;		/* the effective scale (1:maximum) */
391
392#ifdef HAVE_TSEARCH
393static int use_tsearch;
394static void *sorted_data;
395#endif
396
397static int prefix_len = -1;
398
399/******************************************************************************/
400
401static void
402failed(const char *s)
403{
404    perror(s);
405    exit(EXIT_FAILURE);
406}
407
408/* malloc wrapper that never returns NULL */
409static void *
410xmalloc(size_t s)
411{
412    void *p;
413    if ((p = malloc(s)) == NULL)
414	failed("malloc");
415    return p;
416}
417
418static int
419is_dir(const char *name)
420{
421    struct stat sb;
422    return (stat(name, &sb) == 0 &&
423	    (sb.st_mode & S_IFMT) == S_IFDIR);
424}
425
426static void
427blip(int c)
428{
429    if (show_progress) {
430	(void) fputc(c, stderr);
431	(void) fflush(stderr);
432    }
433}
434
435static char *
436new_string(const char *s)
437{
438    return strcpy((char *) xmalloc((size_t) (strlen(s) + 1)), s);
439}
440
441static int
442compare_data(const void *a, const void *b)
443{
444    const DATA *p = (const DATA *) a;
445    const DATA *q = (const DATA *) b;
446    return strcmp(p->name + p->base, q->name + q->base);
447}
448
449static void
450init_data(DATA * data, const char *name, int copy, int base)
451{
452    memset(data, 0, sizeof(*data));
453    data->name = (char *) name;
454    data->copy = copy;
455    data->base = base;
456    data->cmt = Normal;
457}
458
459static DATA *
460new_data(const char *name, int base)
461{
462    DATA *r = (DATA *) xmalloc(sizeof(DATA));
463
464    init_data(r, new_string(name), 0, base);
465
466    return r;
467}
468
469#ifdef HAVE_TSEARCH
470static DATA *
471add_tsearch_data(const char *name, int base)
472{
473    DATA find;
474    DATA *result;
475    void *pp;
476
477    init_data(&find, name, 1, base);
478    if ((pp = tfind(&find, &sorted_data, compare_data)) != 0) {
479	result = *(DATA **) pp;
480	return result;
481    }
482    result = new_data(name, base);
483    (void) tsearch(result, &sorted_data, compare_data);
484    result->link = all_data;
485    all_data = result;
486
487    return result;
488}
489#endif
490
491static DATA *
492find_data(const char *name)
493{
494    DATA *p, *q, *r;
495    DATA find;
496    int base = 0;
497
498    TRACE(("** find_data(%s)\n", name));
499
500    /* Compute the base offset if the prefix option is used */
501    if (prefix_opt >= 0) {
502	int n;
503
504	for (n = prefix_opt; n > 0; n--) {
505	    char *s = strchr(name + base, PATHSEP);
506	    if (s == 0 || *++s == EOS)
507		break;
508	    base = (int) (s - name);
509	}
510	TRACE(("** base set to %d\n", base));
511    }
512
513    /* Insert into sorted list (usually sorted).  If we are not sorting or
514     * merging names, we fall off the end and link the new entry to the end of
515     * the list.  If the prefix option is used, the prefix is ignored by the
516     * merge and sort operations.
517     *
518     * If we have tsearch(), we will maintain the sorted list using it and
519     * tfind().
520     */
521#ifdef HAVE_TSEARCH
522    if (use_tsearch) {
523	r = add_tsearch_data(name, base);
524    } else
525#endif
526    {
527	init_data(&find, name, 1, base);
528	for (p = all_data, q = 0; p != 0; q = p, p = p->link) {
529	    int cmp = compare_data(p, &find);
530	    if (merge_names && (cmp == 0))
531		return p;
532	    if (sort_names && (cmp > 0))
533		break;
534	}
535	r = new_data(name, base);
536	if (q != 0)
537	    q->link = r;
538	else
539	    all_data = r;
540
541	r->link = p;
542    }
543
544    return r;
545}
546
547/*
548 * Remove a unneeded data item from the linked list.  Free the name as well.
549 */
550static int
551delink(DATA * data)
552{
553    DATA *p, *q;
554
555    TRACE(("** delink '%s'\n", data->name));
556
557#ifdef HAVE_TSEARCH
558    if (use_tsearch) {
559	if (tdelete(data, &sorted_data, compare_data) == 0)
560	    return 0;
561    }
562#endif
563    for (p = all_data, q = 0; p != 0; q = p, p = p->link) {
564	if (p == data) {
565	    if (q != 0)
566		q->link = p->link;
567	    else
568		all_data = p->link;
569	    if (!p->copy)
570		free(p->name);
571	    free(p);
572	    return 1;
573	}
574    }
575    return 0;
576}
577
578/*
579 * Compare string 's' against a constant, returning either a pointer just
580 * past the matched part of 's' if it matches exactly, or null if a mismatch
581 * was found.
582 */
583static char *
584match(char *s, const char *p)
585{
586    int ok = 0;
587
588    while (*s != EOS) {
589	if (*p == EOS) {
590	    ok = 1;
591	    break;
592	}
593	if (*s++ != *p++)
594	    break;
595	if (*s == EOS && *p == EOS) {
596	    ok = 1;
597	    break;
598	}
599    }
600    return ok ? s : 0;
601}
602
603static int
604version_num(const char *s)
605{
606    int main_ver, sub_ver;
607    char temp[2];
608    return (sscanf(s, "%d.%d%c", &main_ver, &sub_ver, temp) == 2);
609}
610
611/*
612 * Check for a range of line-numbers, used in editing scripts.
613 */
614static int
615edit_range(const char *s)
616{
617    int first, last;
618    char temp[2];
619    return (sscanf(s, "%d,%d%c", &first, &last, temp) == 2)
620	|| (sscanf(s, "%d%c", &first, temp) == 1);
621}
622
623/*
624 * Decode a range for default diff.
625 */
626static int
627decode_default(char *s,
628	       long *first, long *first_size,
629	       long *second, long *second_size)
630{
631    int rc = 0;
632    char *next;
633
634    if (isdigit(UC(*s))) {
635	*first_size = 1;
636	*second_size = 1;
637
638	*first = strtol(s, &next, 10);
639	if (next != 0 && next != s) {
640	    if (*next == ',') {
641		s = ++next;
642		*first_size = strtol(s, &next, 10) + 1 - *first;
643	    }
644	}
645	if (next != 0 && next != s) {
646	    switch (*next++) {
647	    case 'a':
648	    case 'c':
649	    case 'd':
650		s = next;
651		*second = strtol(s, &next, 10);
652		if (next != 0 && next != s) {
653		    if (*next == ',') {
654			s = ++next;
655			*second_size = strtol(s, &next, 10) + 1 - *second;
656		    }
657		}
658		if (next != 0 && next != s && *next == EOS)
659		    rc = 1;
660		break;
661	    }
662	}
663    }
664    return rc;
665}
666
667/*
668 * Decode a range for unified diff.  Oddly, the comments in diffutils code
669 * claim that both numbers are line-numbers.  However, inspection of the output
670 * shows that the numbers are a line-number followed by a count.
671 */
672static int
673decode_range(const char *s, int *first, int *second)
674{
675    int rc = 0;
676    char check;
677
678    if (isdigit(UC(*s))) {
679	if (sscanf(s, "%d,%d%c", first, second, &check) == 2) {
680	    TRACE(("** decode_range #1 first=%d, second=%d\n", *first, *second));
681	    rc = 1;
682	} else if (sscanf(s, "%d%c", first, &check) == 1) {
683	    *second = *first;	/* diffutils 2.7 does this */
684	    TRACE(("** decode_range #2 first=%d, second=%d\n", *first, *second));
685	    rc = 1;
686	}
687    }
688    return rc;
689}
690
691static int
692HadDiffs(const DATA * data)
693{
694    return InsOf(data) != 0
695	|| DelOf(data) != 0
696	|| ModOf(data) != 0
697	|| data->cmt != Normal;
698}
699
700/*
701 * If the given path is not one of the "ignore" paths, then return true.
702 */
703static int
704can_be_merged(const char *path)
705{
706    int result = 0;
707    if (strcmp(path, "")
708	&& strcmp(path, "/dev/null"))
709	result = 1;
710    return result;
711}
712
713static int
714is_leaf(const char *theLeaf, const char *path)
715{
716    char *s;
717
718    if (strchr(theLeaf, PATHSEP) == 0
719	&& (s = strrchr(path, PATHSEP)) != 0
720	&& !strcmp(++s, theLeaf))
721	return 1;
722    return 0;
723}
724
725static char *
726trim_datapath(DATA ** datap, size_t length, int *localp)
727{
728    char *target = (*datap)->name;
729
730#ifdef HAVE_TSEARCH
731    /*
732     * If we are using tsearch(), make a local copy of the data
733     * so we can trim it without interfering with tsearch's
734     * notion of the ordering of data.  That will create some
735     * spurious empty data, so we add the changed() macro in a
736     * few places to skip over those.
737     */
738    if (use_tsearch) {
739	char *trim = new_string(target);
740	trim[length] = EOS;
741	*datap = add_tsearch_data(trim, (*datap)->base);
742	target = (*datap)->name;
743	free(trim);
744	*localp = 1;
745    } else
746#endif
747	target[length] = EOS;
748
749    return target;
750}
751
752/*
753 * The 'data' parameter points to the first of two markers, while
754 * 'path' is the pathname from the second marker.
755 *
756 * On the first call for
757 * a given file, the 'data' parameter stores no differences.
758 */
759static char *
760do_merging(DATA * data, char *path, int *freed)
761{
762    char *target = reverse_opt ? path : data->name;
763    char *source = reverse_opt ? data->name : path;
764    char *result = source;
765
766    TRACE(("** do_merging(\"%s\",\"%s\") diffs:%d\n",
767	   data->name, path, HadDiffs(data)));
768
769    *freed = 0;
770    if (!HadDiffs(data)) {
771
772	if (is_leaf(target, source)) {
773	    TRACE(("** is_leaf: \"%s\" vs \"%s\"\n", target, source));
774	    if (reverse_opt) {
775		TRACE((".. no action @%d\n", __LINE__));
776	    } else {
777		*freed = delink(data);
778	    }
779	} else if (can_be_merged(target)
780		   && can_be_merged(source)) {
781	    size_t len1 = strlen(target);
782	    size_t len2 = strlen(source);
783	    size_t n;
784	    int matched = 0;
785	    int diff = 0;
786	    int local = 0;
787
788	    /*
789	     * If the source/target differ only by some suffix, e.g., ".orig"
790	     * or ".bak", strip that off.  The target may may also be a
791	     * temporary filename (which would not be merged since it has no
792	     * apparent relationship to the current).
793	     */
794	    if (len1 > len2) {
795		if (!strncmp(target, source, len2)) {
796		    TRACE(("** trimming data \"%s\" to \"%.*s\"\n",
797			   target, (int) len2, target));
798		    if (reverse_opt) {
799			TRACE((".. no action @%d\n", __LINE__));
800		    } else {
801			target = trim_datapath(&data, len1 = len2, &local);
802		    }
803		}
804	    } else if (len1 < len2) {
805		if (!strncmp(target, source, len1)) {
806		    TRACE(("** trimming source \"%s\" to \"%.*s\"\n",
807			   source, (int) len1, source));
808		    if (reverse_opt) {
809			TRACE((".. no action @%d\n", __LINE__));
810		    } else {
811			source[len2 = len1] = EOS;
812		    }
813		}
814	    }
815
816	    /*
817	     * If there was no "-p" option, look for the best match by
818	     * stripping prefixes from both source/target strings.
819	     */
820	    if (prefix_opt < 0) {
821		/*
822		 * Now (whether or not we trimmed a suffix), scan back from the
823		 * end of source/target strings to find if they happen to share
824		 * a common ending, e.g., a/b/c versus d/b/c.  If the strings
825		 * are not identical, then 'diff' will be set, but if they have
826		 * a common ending then 'matched' will be set.
827		 */
828		for (n = 1; n <= len1 && n <= len2; n++) {
829		    if (target[len1 - n] != source[len2 - n]) {
830			diff = (int) n;
831			break;
832		    }
833		    if (source[len2 - n] == PATHSEP) {
834			matched = (int) n;
835		    }
836		}
837
838		TRACE(("** merge @%d, prefix_opt=%d matched=%d diff=%d\n",
839		       __LINE__, prefix_opt, matched, diff));
840		if (matched != 0 && diff) {
841		    if (reverse_opt) {
842			TRACE((".. no action @%d\n", __LINE__));
843		    } else {
844			result = source + ((int) len2 - matched + 1);
845		    }
846		}
847	    }
848
849	    if (!local) {
850		if (reverse_opt) {
851		    TRACE((".. no action @%d\n", __LINE__));
852		} else {
853		    *freed = delink(data);
854		}
855	    }
856	} else if (reverse_opt) {
857	    TRACE((".. no action @%d\n", __LINE__));
858	    if (can_be_merged(source)) {
859		TRACE(("** merge @%d\n", __LINE__));
860	    } else {
861		TRACE(("** do not merge, retain @%d\n", __LINE__));
862		/* must not merge, retain existing name */
863		result = target;
864	    }
865	} else {
866	    if (can_be_merged(source)) {
867		TRACE(("** merge @%d\n", __LINE__));
868		*freed = delink(data);
869	    } else {
870		TRACE(("** do not merge, retain @%d\n", __LINE__));
871		/* must not merge, retain existing name */
872		result = target;
873	    }
874	}
875    } else if (reverse_opt) {
876	TRACE((".. no action @%d\n", __LINE__));
877	if (can_be_merged(source)) {
878	    TRACE(("** merge @%d\n", __LINE__));
879	    result = target;
880	} else {
881	    TRACE(("** do not merge, retain @%d\n", __LINE__));
882	}
883    } else {
884	if (can_be_merged(source)) {
885	    TRACE(("** merge @%d\n", __LINE__));
886	} else {
887	    TRACE(("** do not merge, retain @%d\n", __LINE__));
888	    result = target;
889	}
890    }
891    TRACE(("** finish do_merging ->\"%s\"\n", result));
892    return result;
893}
894
895static int
896begin_data(const DATA * p)
897{
898    if (!can_be_merged(p->name)
899	&& strchr(p->name, PATHSEP) != 0) {
900	TRACE(("** begin_data:HAVE_PATH\n"));
901	return HAVE_PATH;
902    }
903    TRACE(("** begin_data:HAVE_GENERIC\n"));
904    return HAVE_GENERIC;
905}
906
907static char *
908skip_blanks(char *s)
909{
910    while (isspace(UC(*s)))
911	++s;
912    return s;
913}
914
915/*
916 * Skip a filename, which may be in quotes, to allow embedded blanks in the
917 * name.
918 */
919static char *
920skip_filename(char *s)
921{
922    if (*s == SQUOTE && s[1] != EOS && strchr(s + 1, SQUOTE)) {
923	++s;
924	while (*s != EOS && (*s != SQUOTE) && isgraph(UC(*s))) {
925	    ++s;
926	}
927	++s;
928    } else {
929	while (*s != EOS && isgraph(UC(*s))) {
930	    ++s;
931	}
932    }
933    return s;
934}
935
936static char *
937skip_options(char *params)
938{
939    while (*params != EOS) {
940	params = skip_blanks(params);
941	if (*params == '-') {
942	    while (isgraph(UC(*params)))
943		params++;
944	} else {
945	    break;
946	}
947    }
948    return skip_blanks(params);
949}
950
951/*
952 * Strip single-quotes from a name (needed for recent makepatch versions).
953 */
954static void
955dequote(char *s)
956{
957    size_t len = strlen(s);
958    int n;
959
960    if (*s == SQUOTE && len > 2 && s[len - 1] == SQUOTE) {
961	for (n = 0; (s[n] = s[n + 1]) != EOS; ++n) {
962	    ;
963	}
964	s[len - 2] = EOS;
965    }
966}
967
968/*
969 * Allocate a fixed-buffer
970 */
971static void
972fixed_buffer(char **buffer, size_t want)
973{
974    *buffer = (char *) xmalloc(want);
975}
976
977/*
978 * Reallocate a fixed-buffer
979 */
980static void
981adjust_buffer(char **buffer, size_t want)
982{
983    if ((*buffer = (char *) realloc(*buffer, want)) == 0)
984	failed("realloc");
985}
986
987/*
988 * Read until newline or end-of-file, allocating the line-buffer so it is long
989 * enough for the input.
990 */
991static int
992get_line(char **buffer, size_t *have, FILE *fp)
993{
994    int ch;
995    size_t used = 0;
996
997    while ((ch = MY_GETC(fp)) != EOF) {
998	if (used + 2 > *have) {
999	    adjust_buffer(buffer, *have *= 2);
1000	}
1001	(*buffer)[used++] = (char) ch;
1002	if (ch == '\n')
1003	    break;
1004    }
1005    (*buffer)[used] = EOS;
1006    return (used != 0);
1007}
1008
1009static char *
1010data_filename(const DATA * p)
1011{
1012    return (p->name + (prefix_opt >= 0 ? p->base : prefix_len));
1013}
1014
1015/*
1016 * Count the (new)lines in a file, return -1 if the file is not found.
1017 */
1018static int
1019count_lines(DATA * p)
1020{
1021    int result = -1;
1022    char *filename = 0;
1023    char *filetail = data_filename(p);
1024    size_t want = strlen(path_opt) + 2 + strlen(filetail);
1025    FILE *fp;
1026    int ch;
1027
1028    if ((filename = malloc(want)) != 0) {
1029	int merge = 0;
1030
1031	if (path_dest) {
1032	    size_t path_len = strlen(path_opt);
1033	    size_t tail_len;
1034	    char *tail_sep = strchr(filetail, PATHSEP);
1035
1036	    if (tail_sep != 0) {
1037		tail_len = (size_t) (tail_sep - filetail);
1038		if (tail_len != 0 && tail_len <= path_len) {
1039		    if (tail_len < path_len
1040			&& path_opt[path_len - tail_len - 1] != PATHSEP) {
1041			merge = 0;
1042		    } else if (!strncmp(path_opt + path_len - tail_len,
1043					filetail,
1044					tail_len - 1)) {
1045			merge = 1;
1046			if (path_len > tail_len) {
1047			    sprintf(filename, "%.*s%c%s",
1048				    (int) (path_len - tail_len),
1049				    path_opt,
1050				    PATHSEP,
1051				    filetail);
1052			} else {
1053			    strcpy(filename, filetail);
1054			}
1055		    }
1056		}
1057	    }
1058	}
1059	if (!merge) {
1060	    sprintf(filename, "%s%c%s", path_opt, PATHSEP, filetail);
1061	}
1062
1063	TRACE(("count_lines %s\n", filename));
1064	if ((fp = fopen(filename, "r")) != 0) {
1065	    result = 0;
1066	    while ((ch = MY_GETC(fp)) != EOF) {
1067		if (ch == '\n')
1068		    ++result;
1069	    }
1070	    fclose(fp);
1071	} else {
1072	    fprintf(stderr, "Cannot open %s\n", filename);
1073	}
1074	free(filename);
1075    } else {
1076	failed("count_lines");
1077    }
1078    return result;
1079}
1080
1081static void
1082update_chunk(DATA * p, Change change)
1083{
1084    if (merge_opt) {
1085	p->pending += 1;
1086	p->chunk[change] += 1;
1087    } else {
1088	p->count[change] += 1;
1089    }
1090}
1091
1092static void
1093finish_chunk(DATA * p)
1094{
1095    int i;
1096
1097    if (p->pending) {
1098	p->pending = 0;
1099	p->chunks += 1;
1100	if (merge_opt) {
1101	    /*
1102	     * This is crude, but to make it really precise we would have
1103	     * to keep an array of line-numbers to which which in a chunk
1104	     * are marked as insert/delete.
1105	     */
1106	    if (p->chunk[cInsert] && p->chunk[cDelete]) {
1107		long change;
1108		if (p->chunk[cInsert] > p->chunk[cDelete]) {
1109		    change = p->chunk[cDelete];
1110		} else {
1111		    change = p->chunk[cInsert];
1112		}
1113		p->chunk[cInsert] -= change;
1114		p->chunk[cDelete] -= change;
1115		p->chunk[cModify] += change;
1116	    }
1117	}
1118	for_each_mark(i) {
1119	    p->count[i] += p->chunk[i];
1120	    p->chunk[i] = 0;
1121	}
1122    }
1123}
1124
1125#define date_delims(a,b) (((a)=='/' && (b)=='/') || ((a) == '-' && (b) == '-'))
1126#define CASE_TRACE() TRACE(("** handle case for '%c' %d:%s\n", *buffer, ok, that ? that->name : ""))
1127
1128static void
1129do_file(FILE *fp, const char *default_name)
1130{
1131    static const char *only_stars = "***************";
1132
1133    DATA dummy;
1134    DATA *that = &dummy;
1135    DATA *prev = 0;
1136    char *buffer = 0;
1137    char *b_fname = 0;
1138    char *b_temp1 = 0;
1139    char *b_temp2 = 0;
1140    char *b_temp3 = 0;
1141    size_t length = 0;
1142    size_t fixed = 0;
1143    int ok = HAVE_NOTHING;
1144    int marker;
1145    int freed = 0;
1146
1147    int unified = 0;
1148    int old_unify = 0;
1149    int new_unify = 0;
1150    int expect_unify = 0;
1151
1152    long old_dft = 0;
1153    long new_dft = 0;
1154
1155    int context = 1;
1156
1157    char *s;
1158#if OPT_TRACE
1159    int line_no = 0;
1160#endif
1161
1162    init_data(&dummy, "", 1, 0);
1163
1164    fixed_buffer(&buffer, fixed = length = BUFSIZ);
1165    fixed_buffer(&b_fname, length);
1166    fixed_buffer(&b_temp1, length);
1167    fixed_buffer(&b_temp2, length);
1168    fixed_buffer(&b_temp3, length);
1169
1170    while (get_line(&buffer, &length, fp)) {
1171	/*
1172	 * Adjust size of fixed-buffers so that a sscanf cannot overflow.
1173	 */
1174	if (length > fixed) {
1175	    fixed = length;
1176	    adjust_buffer(&b_fname, length);
1177	    adjust_buffer(&b_temp1, length);
1178	    adjust_buffer(&b_temp2, length);
1179	    adjust_buffer(&b_temp3, length);
1180	}
1181
1182	/*
1183	 * Trim trailing newline.
1184	 */
1185	for (s = buffer + strlen(buffer); s > buffer; s--) {
1186	    if ((UC(s[-1]) == '\n') || (UC(s[-1]) == '\r'))
1187		s[-1] = EOS;
1188	    else
1189		break;
1190	}
1191	++line_no;
1192	TRACE(("[%05d] %s\n", line_no, buffer));
1193
1194	/*
1195	 * "patch -U" can create ".rej" files lacking a filename header,
1196	 * in unified format.  Check for those.
1197	 */
1198	if (line_no == 1 && !strncmp(buffer, "@@", (size_t) 2)) {
1199	    unified = 2;
1200	    that = find_data(default_name);
1201	    ok = begin_data(that);
1202	}
1203
1204	/*
1205	 * The lines identifying files in a context diff depend on how it was
1206	 * invoked.  But after the header, each chunk begins with a line
1207	 * containing 15 *'s.  Each chunk may contain a line-range with '***'
1208	 * for the "before", and a line-range with '---' for the "after".  The
1209	 * part of the chunk depicting the deletion may be absent, though the
1210	 * edit line is present.
1211	 *
1212	 * The markers for unified diff are a little different from the normal
1213	 * context-diff.  Also, the edit-lines in a unified diff won't have a
1214	 * space in column 2.  Because of the missing space, we have to count
1215	 * lines to ensure we do not confuse the marker lines.
1216	 */
1217	marker = 0;
1218	if (that != &dummy && !strcmp(buffer, only_stars)) {
1219	    finish_chunk(that);
1220	    TRACE(("** begin context chunk\n"));
1221	    context = 2;
1222	} else if (line_no == 1 && !strcmp(buffer, only_stars)) {
1223	    TRACE(("** begin context chunk\n"));
1224	    context = 2;
1225	    that = find_data(default_name);
1226	    ok = begin_data(that);
1227	} else if (context == 2 && match(buffer, "*** ")) {
1228	    context = 1;
1229	} else if (context == 1 && match(buffer, "--- ")) {
1230	    marker = 1;
1231	    context = 0;
1232	} else if (match(buffer, "*** ")) {
1233	} else if ((old_unify + new_unify) == 0 && match(buffer, "==== ")) {
1234	    finish_chunk(that);
1235	    unified = 2;
1236	} else if ((old_unify + new_unify) == 0 && match(buffer, "--- ")) {
1237	    finish_chunk(that);
1238	    marker = unified = 1;
1239	} else if ((old_unify + new_unify) == 0 && match(buffer, "+++ ")) {
1240	    marker = unified = 2;
1241	} else if (unified == 2
1242		   || ((old_unify + new_unify) == 0 && (*buffer == '@'))) {
1243	    finish_chunk(that);
1244	    unified = 0;
1245	    if (*buffer == '@') {
1246		int old_base, new_base, old_size, new_size;
1247		char test_at;
1248
1249		old_unify = new_unify = 0;
1250		if (sscanf(buffer, "@@ -%[0-9,] +%[0-9,] @%c",
1251			   b_temp1,
1252			   b_temp2,
1253			   &test_at) == 3
1254		    && test_at == '@'
1255		    && decode_range(b_temp1, &old_base, &old_size)
1256		    && decode_range(b_temp2, &new_base, &new_size)) {
1257		    old_unify = old_size;
1258		    new_unify = new_size;
1259		    unified = -1;
1260		}
1261	    }
1262	} else if (unified == 1 && !context) {
1263	    /*
1264	     * If unified==1, we guessed we would find a "+++" line, but since
1265	     * we are here, we did not find that.  The context check ensures
1266	     * we do not mistake the "---" for a unified diff with that for
1267	     * a context diff's "after" line-range.
1268	     *
1269	     * If we guessed wrong, then we probably found a data line with
1270	     * "--" in the first two columns of the diff'd file.
1271	     */
1272	    unified = 0;
1273	    TRACE(("?? Expected \"+++\" for unified diff\n"));
1274	    if (prev != 0
1275		&& prev != that
1276		&& InsOf(that) == 0
1277		&& DelOf(that) == 0
1278		&& strcmp(prev->name, that->name)) {
1279		TRACE(("?? giveup on %ld/%ld %s\n", InsOf(that),
1280		       DelOf(that), that->name));
1281		TRACE(("?? revert to %ld/%ld %s\n", InsOf(prev),
1282		       DelOf(prev), prev->name));
1283		(void) delink(that);
1284		that = prev;
1285		update_chunk(that, cDelete);
1286	    }
1287	} else if (old_unify + new_unify) {
1288	    switch (*buffer) {
1289	    case '-':
1290		if (old_unify)
1291		    --old_unify;
1292		break;
1293	    case '+':
1294		if (new_unify)
1295		    --new_unify;
1296		break;
1297	    case EOS:
1298	    case ' ':
1299		if (old_unify)
1300		    --old_unify;
1301		if (new_unify)
1302		    --new_unify;
1303		break;
1304	    case '\\':
1305		if (strstr(buffer, "newline") != 0) {
1306		    break;
1307		}
1308		/* FALLTHRU */
1309	    default:
1310		TRACE(("?? expected more in chunk\n"));
1311		old_unify = new_unify = 0;
1312		break;
1313	    }
1314	    if (!(old_unify + new_unify)) {
1315		expect_unify = 2;
1316	    }
1317	} else {
1318	    long old_base, new_base;
1319
1320	    unified = 0;
1321
1322	    if (line_no == 1
1323		&& decode_default(buffer,
1324				  &old_base, &old_dft,
1325				  &new_base, &new_dft)) {
1326		TRACE(("DFT %ld,%ld -> %ld,%ld\n",
1327		       old_base, old_base + old_dft - 1,
1328		       new_base, new_base + new_dft - 1));
1329		finish_chunk(that);
1330		that = find_data("unknown");
1331		ok = begin_data(that);
1332	    }
1333	}
1334
1335	/*
1336	 * If the previous line ended a chunk of a unified diff, we may begin
1337	 * another chunk, or begin another type of diff.  If neither, do not
1338	 * continue to accumulate counts for the unified diff which has ended.
1339	 */
1340	if (expect_unify != 0) {
1341	    if (expect_unify-- == 1) {
1342		if (unified == 0) {
1343		    TRACE(("?? did not get chunk\n"));
1344		    finish_chunk(that);
1345		    that = &dummy;
1346		}
1347	    }
1348	}
1349
1350	/*
1351	 * Override the beginning of the line to simplify the case statement
1352	 * below.
1353	 */
1354	if (marker > 0) {
1355	    TRACE(("** have marker=%d, override %s\n", marker, buffer));
1356	    (void) strncpy(buffer, "***", (size_t) 3);
1357	}
1358
1359	/*
1360	 * Use the first character of the input line to determine its
1361	 * type:
1362	 */
1363	switch (*buffer) {
1364	case 'O':		/* Only */
1365	    CASE_TRACE();
1366	    if (match(buffer, "Only in ")) {
1367		char *path = buffer + 8;
1368		int found = 0;
1369		for (s = path; *s != EOS; s++) {
1370		    if (match(s, ": ")) {
1371			found = 1;
1372			*s++ = PATHSEP;
1373			while ((s[0] = s[1]) != EOS)
1374			    s++;
1375			break;
1376		    }
1377		}
1378		if (found) {
1379		    blip('.');
1380		    finish_chunk(that);
1381		    that = find_data(path);
1382		    that->cmt = Only;
1383		    ok = HAVE_NOTHING;
1384		}
1385	    }
1386	    break;
1387
1388	    /*
1389	     * Several different scripts produce "Index:" lines
1390	     * (e.g., "makepatch").  Not all bother to put the
1391	     * pathname of the files; some put only the leaf names.
1392	     */
1393	case 'I':
1394	    CASE_TRACE();
1395	    if ((s = match(buffer, "Index: ")) != 0) {
1396		s = skip_blanks(s);
1397		dequote(s);
1398		blip('.');
1399		finish_chunk(that);
1400		s = do_merging(that, s, &freed);
1401		that = find_data(s);
1402		ok = begin_data(that);
1403	    }
1404	    break;
1405
1406	case 'd':		/* diff command trace */
1407	    CASE_TRACE();
1408	    if ((s = match(buffer, "diff ")) != 0
1409		&& *(s = skip_options(s)) != EOS) {
1410		if (reverse_opt) {
1411		    *skip_filename(s) = EOS;
1412		} else {
1413		    s = skip_filename(s);
1414		    s = skip_blanks(s);
1415		}
1416		dequote(s);
1417		blip('.');
1418		finish_chunk(that);
1419		s = do_merging(that, s, &freed);
1420		that = find_data(s);
1421		ok = begin_data(that);
1422	    }
1423	    break;
1424
1425	case '*':
1426	    CASE_TRACE();
1427	    if (!(ok & HAVE_PATH)) {
1428		int ddd, hour, minute, second;
1429		int day, month, year;
1430		char yrmon, monday;
1431
1432		/* check for tab-delimited first, so we can
1433		 * accept filenames containing spaces.
1434		 */
1435		if (sscanf(buffer,
1436			   "*** %[^\t]\t%[^ ] %[^ ] %d %d:%d:%d %d",
1437			   b_fname,
1438			   b_temp2, b_temp3, &ddd,
1439			   &hour, &minute, &second, &year) == 8
1440		    || (sscanf(buffer,
1441			       "*** %[^\t]\t%d%c%d%c%d %d:%d:%d",
1442			       b_fname,
1443			       &year, &yrmon, &month, &monday, &day,
1444			       &hour, &minute, &second) == 9
1445			&& date_delims(yrmon, monday)
1446			&& !version_num(b_fname))
1447		    || sscanf(buffer,
1448			      "*** %[^\t ]%[\t ]%[^ ] %[^ ] %d %d:%d:%d %d",
1449			      b_fname,
1450			      b_temp1,
1451			      b_temp2, b_temp3, &ddd,
1452			      &hour, &minute, &second, &year) == 9
1453		    || (sscanf(buffer,
1454			       "*** %[^\t ]%[\t ]%d%c%d%c%d %d:%d:%d",
1455			       b_fname,
1456			       b_temp1,
1457			       &year, &yrmon, &month, &monday, &day,
1458			       &hour, &minute, &second) == 10
1459			&& date_delims(yrmon, monday)
1460			&& !version_num(b_fname))
1461		    || (sscanf(buffer,
1462			       "*** %[^\t ]%[\t ]",
1463			       b_fname,
1464			       b_temp1) >= 1
1465			&& !version_num(b_fname)
1466			&& !contain_any(b_fname, "*")
1467			&& !edit_range(b_fname))
1468		    ) {
1469		    prev = that;
1470		    finish_chunk(that);
1471		    s = do_merging(that, b_fname, &freed);
1472		    if (freed)
1473			prev = 0;
1474		    that = find_data(s);
1475		    ok = begin_data(that);
1476		    TRACE(("** after merge:%d:%s\n", ok, s));
1477		}
1478	    }
1479	    break;
1480
1481	case '=':
1482	    CASE_TRACE();
1483	    if (!(ok & HAVE_PATH)) {
1484		int rev;
1485
1486		if (((sscanf(buffer,
1487			     "==== %[^\t #]#%d - %[^\t ]",
1488			     b_fname,
1489			     &rev,
1490			     b_temp1) == 3)
1491		     || ((sscanf(buffer,
1492				 "==== %[^\t #]#%d (%[^)]) - %[^\t ]",
1493				 b_fname,
1494				 &rev,
1495				 b_temp1,
1496				 b_temp2) == 4)))
1497		    && !version_num(b_fname)
1498		    && !contain_any(b_fname, "*")
1499		    && !edit_range(b_fname)) {
1500		    TRACE(("** found p4-diff\n"));
1501		    prev = that;
1502		    finish_chunk(that);
1503		    s = do_merging(that, b_fname, &freed);
1504		    if (freed)
1505			prev = 0;
1506		    that = find_data(s);
1507		    ok = begin_data(that);
1508		    TRACE(("** after merge:%d:%s\n", ok, s));
1509		}
1510	    }
1511	    break;
1512
1513	case '+':
1514	    /* FALL-THRU */
1515	case '>':
1516	    CASE_TRACE();
1517	    if (ok) {
1518		update_chunk(that, cInsert);
1519	    }
1520	    break;
1521
1522	case '-':
1523	    if (!ok) {
1524		CASE_TRACE();
1525		break;
1526	    }
1527	    if (!unified && !strcmp(buffer, "---")) {
1528		CASE_TRACE();
1529		break;
1530	    }
1531	    /* fall-thru */
1532	case '<':
1533	    CASE_TRACE();
1534	    if (ok) {
1535		update_chunk(that, cDelete);
1536	    }
1537	    break;
1538
1539	case '!':
1540	    CASE_TRACE();
1541	    if (ok) {
1542		update_chunk(that, cModify);
1543	    }
1544	    break;
1545
1546	    /* Expecting "Binary files XXX and YYY differ" */
1547	case 'B':		/* Binary */
1548	    /* FALL-THRU */
1549	case 'b':		/* binary */
1550	    CASE_TRACE();
1551	    if ((s = match(buffer + 1, "inary files ")) != 0) {
1552		char *first = skip_blanks(s);
1553		/* blindly assume the first filename does not contain " and " */
1554		char *at_and = strstr(s, " and ");
1555		s = strrchr(buffer, BLANK);
1556		if ((at_and != NULL) && !strcmp(s, " differ")) {
1557		    char *second = skip_blanks(at_and + 5);
1558
1559		    if (reverse_opt) {
1560			*at_and = EOS;
1561			s = first;
1562		    } else {
1563			*s = EOS;
1564			s = second;
1565		    }
1566		    blip('.');
1567		    finish_chunk(that);
1568		    that = find_data(s);
1569		    that->cmt = Binary;
1570		    ok = HAVE_NOTHING;
1571		}
1572	    }
1573	    break;
1574	}
1575    }
1576    blip('\n');
1577
1578    finish_chunk(that);
1579    finish_chunk(&dummy);
1580    if (buffer != 0) {
1581	free(buffer);
1582	free(b_fname);
1583	free(b_temp1);
1584	free(b_temp2);
1585	free(b_temp3);
1586    }
1587}
1588
1589static void
1590show_color(int color)
1591{
1592    if (color >= 0)
1593	printf("\033[%dm", color + 30);
1594    else
1595	printf("\033[0;39m");
1596}
1597
1598static long
1599plot_bar(long count, int c, int color)
1600{
1601    long result = count;
1602
1603    if (show_colors && result != 0)
1604	show_color(color);
1605
1606    while (--count >= 0)
1607	(void) putchar(c);
1608
1609    if (show_colors && result != 0)
1610	show_color(-1);
1611
1612    return result;
1613}
1614
1615/*
1616 * Each call to 'plot_num()' prints a scaled bar of 'c' characters.  The
1617 * 'extra' parameter is used to keep the accumulated error in the bar's total
1618 * length from getting large.
1619 */
1620static long
1621plot_num(long num_value, int c, int color, long *extra)
1622{
1623    long product;
1624    long result = 0;
1625
1626    /* the value to plot */
1627    /* character to display in the bar */
1628    /* accumulated error in the bar */
1629    if (num_value) {
1630	product = (plot_width * num_value);
1631	result = ((product + *extra) / plot_scale);
1632	*extra = product - (result * plot_scale) - *extra;
1633	plot_bar(result, c, color);
1634    }
1635    return result;
1636}
1637
1638static long
1639plot_round1(const long num[MARKS])
1640{
1641    long result = 0;
1642    long scaled[MARKS];
1643    long remain[MARKS];
1644    long want = 0;
1645    long have = 0;
1646    long half = (plot_scale / 2);
1647    int i, j;
1648
1649    for_each_mark(i) {
1650	long product = (plot_width * num[i]);
1651	scaled[i] = (product / plot_scale);
1652	remain[i] = (product % plot_scale);
1653	want += product;
1654	have += product - remain[i];
1655    }
1656    while (want > have) {
1657	j = -1;
1658	for_each_mark(i) {
1659	    if (remain[i] != 0
1660		&& (remain[i] > (j >= 0 ? remain[j] : half))) {
1661		j = i;
1662	    }
1663	}
1664	if (j >= 0) {
1665	    have += remain[j];
1666	    remain[j] = 0;
1667	    scaled[j] += 1;
1668	} else {
1669	    break;
1670	}
1671    }
1672    for_each_mark(i) {
1673	plot_bar(scaled[i], marks[i], colors[i]);
1674	result += scaled[i];
1675    }
1676    return result;
1677}
1678
1679/*
1680 * Print a scaled bar of characters, where c[0] is for insertions, c[1]
1681 * for deletions and c[2] for modifications. The num array contains the
1682 * count for each type of change, in the same order.
1683 */
1684static long
1685plot_round2(const long num[MARKS])
1686{
1687    long result = 0;
1688    long scaled[MARKS];
1689    long remain[MARKS];
1690    long total = 0;
1691    int i;
1692
1693    for (i = 0; i < MARKS; i++)
1694	total += num[i];
1695
1696    if (total == 0)
1697	return result;
1698
1699    total = (total * plot_width + (plot_scale / 2)) / plot_scale;
1700    /* display at least one character */
1701    if (total == 0)
1702	total++;
1703
1704    for_each_mark(i) {
1705	scaled[i] = num[i] * plot_width / plot_scale;
1706	remain[i] = num[i] * plot_width - scaled[i] * plot_scale;
1707	total -= scaled[i];
1708    }
1709
1710    /* assign the missing chars using the largest remainder algo */
1711    while (total) {
1712	int largest, largest_count;	/* largest is a bit field */
1713	long max_remain;
1714
1715	/* search for the largest remainder */
1716	largest = largest_count = 0;
1717	max_remain = 0;
1718	for_each_mark(i) {
1719	    if (remain[i] > max_remain) {
1720		largest = 1 << i;
1721		largest_count = 1;
1722		max_remain = remain[i];
1723	    } else if (remain[i] == max_remain) {	/* ex aequo */
1724		largest |= 1 << i;
1725		largest_count++;
1726	    }
1727	}
1728
1729	/* if there are more greatest remainders than characters
1730	   missing, don't assign them at all */
1731	if (total < largest_count)
1732	    break;
1733
1734	/* allocate the extra characters */
1735	for_each_mark(i) {
1736	    if (largest & (1 << i)) {
1737		scaled[i]++;
1738		total--;
1739		remain[i] -= plot_width;
1740	    }
1741	}
1742    }
1743
1744    for_each_mark(i) {
1745	result += plot_bar(scaled[i], marks[i], colors[i]);
1746    }
1747
1748    return result;
1749}
1750
1751static void
1752plot_numbers(const DATA * p)
1753{
1754    long temp = 0;
1755    long used = 0;
1756    int i;
1757
1758    printf("%5ld ", TotalOf(p));
1759
1760    if (format_opt & FMT_VERBOSE) {
1761	printf("%5ld ", InsOf(p));
1762	printf("%5ld ", DelOf(p));
1763	printf("%5ld ", ModOf(p));
1764	if (path_opt)
1765	    printf("%5ld ", EqlOf(p));
1766    }
1767
1768    if (format_opt == FMT_CONCISE) {
1769	for_each_mark(i) {
1770	    printf("\t%ld %c", p->count[i], marks[i]);
1771	}
1772    } else {
1773	switch (round_opt) {
1774	default:
1775	    for_each_mark(i) {
1776		used += plot_num(p->count[i], marks[i], colors[i], &temp);
1777	    }
1778	    break;
1779	case 1:
1780	    used = plot_round1(p->count);
1781	    break;
1782
1783	case 2:
1784	    used = plot_round2(p->count);
1785	    break;
1786	}
1787
1788	if ((format_opt & FMT_FILLED) != 0) {
1789	    if (used > plot_width)
1790		printf("%ld", used - plot_width);	/* oops */
1791	    else
1792		plot_bar(plot_width - used, '.', 0);
1793	}
1794    }
1795}
1796
1797#define changed(p) (!merge_names \
1798		    || (p)->cmt != Normal \
1799		    || (TotalOf(p)) != 0)
1800
1801static void
1802show_data(const DATA * p)
1803{
1804    char *name = data_filename(p);
1805    int width;
1806
1807    if (summary_only) {
1808	;
1809    } else if (!changed(p)) {
1810	;
1811    } else if (p->cmt == Binary && suppress_binary == 1) {
1812	;
1813    } else if (table_opt) {
1814	if (names_only) {
1815	    printf("%s\n", name);
1816	} else {
1817	    printf("%ld,%ld,%ld,",
1818		   InsOf(p),
1819		   DelOf(p),
1820		   ModOf(p));
1821	    if (path_opt)
1822		printf("%ld,", EqlOf(p));
1823	    printf("%s\n", name);
1824	}
1825    } else if (names_only) {
1826	printf("%s\n", name);
1827    } else {
1828	printf("%s ", comment_opt);
1829	if (max_name_wide > 0
1830	    && max_name_wide < min_name_wide
1831	    && max_name_wide < ((width = (int) strlen(name)))) {
1832	    printf("%.*s", max_name_wide, name + (width - max_name_wide));
1833	} else {
1834	    width = ((max_name_wide > 0 && max_name_wide < min_name_wide)
1835		     ? max_name_wide
1836		     : min_name_wide);
1837	    printf("%-*.*s", width, width, name);
1838	}
1839	putchar('|');
1840	switch (p->cmt) {
1841	default:
1842	case Normal:
1843	    plot_numbers(p);
1844	    break;
1845	case Binary:
1846	    printf("binary");
1847	    break;
1848	case Only:
1849	    printf("only");
1850	    break;
1851	}
1852	printf("\n");
1853    }
1854}
1855
1856#ifdef HAVE_TSEARCH
1857static void
1858show_tsearch(const void *nodep, const VISIT which, const int depth)
1859{
1860    const DATA *p = *(DATA * const *) nodep;
1861    (void) depth;
1862    if (which == postorder || which == leaf)
1863	show_data(p);
1864}
1865#endif
1866
1867static int
1868ignore_data(DATA * p)
1869{
1870    return ((!changed(p))
1871	    || (p->cmt == Binary && suppress_binary));
1872}
1873
1874static void
1875summarize(void)
1876{
1877    DATA *p;
1878    long total_ins = 0;
1879    long total_del = 0;
1880    long total_mod = 0;
1881    long total_eql = 0;
1882    long temp;
1883    int num_files = 0, shortest_name = -1, longest_name = -1;
1884
1885    plot_scale = 0;
1886    for (p = all_data; p; p = p->link) {
1887	int len = (int) strlen(p->name);
1888
1889	if (ignore_data(p))
1890	    continue;
1891
1892	/*
1893	 * If "-pX" option is given, prefix_opt is positive.
1894	 *
1895	 * "-p0" gives the whole pathname unmodified.  "-p1" strips
1896	 * through the first path-separator, etc.
1897	 */
1898	if (prefix_opt >= 0) {
1899	    /* p->base has been computed at node creation */
1900	    if (min_name_wide < (len - p->base))
1901		min_name_wide = (len - p->base);
1902	} else {
1903	    /*
1904	     * If "-pX" option is not given, strip off any prefix which is
1905	     * shared by all of the names.
1906	     */
1907	    if (len < prefix_len || prefix_len < 0)
1908		prefix_len = len;
1909	    while (prefix_len > 0) {
1910		if (p->name[prefix_len - 1] != PATHSEP)
1911		    prefix_len--;
1912		else if (strncmp(all_data->name, p->name, (size_t) prefix_len))
1913		    prefix_len--;
1914		else
1915		    break;
1916	    }
1917
1918	    if (len > longest_name)
1919		longest_name = len;
1920	    if (len < shortest_name || shortest_name < 0)
1921		shortest_name = len;
1922	}
1923    }
1924
1925    /*
1926     * Use a separate loop after computing prefix_len so we can apply the "-S"
1927     * or "-D" options to find files that we can use as reference for the
1928     * unchanged-count.
1929     */
1930    for (p = all_data; p; p = p->link) {
1931	if (!ignore_data(p)) {
1932	    EqlOf(p) = 0;
1933	    if (reverse_opt) {
1934		int save_ins = InsOf(p);
1935		int save_del = DelOf(p);
1936		InsOf(p) = save_del;
1937		DelOf(p) = save_ins;
1938	    }
1939	    if (path_opt != 0) {
1940		int count = count_lines(p);
1941
1942		if (count >= 0) {
1943		    EqlOf(p) = count - ModOf(p);
1944		    if (path_dest != 0) {
1945			EqlOf(p) -= InsOf(p);
1946		    } else {
1947			EqlOf(p) -= DelOf(p);
1948		    }
1949		    if (EqlOf(p) < 0)
1950			EqlOf(p) = 0;
1951		}
1952	    }
1953	    num_files++;
1954	    total_ins += InsOf(p);
1955	    total_del += DelOf(p);
1956	    total_mod += ModOf(p);
1957	    total_eql += EqlOf(p);
1958	    temp = TotalOf(p);
1959	    if (temp > plot_scale)
1960		plot_scale = temp;
1961	}
1962    }
1963
1964    if (prefix_opt < 0) {
1965	if (prefix_len < 0)
1966	    prefix_len = 0;
1967	if ((longest_name - prefix_len) > min_name_wide)
1968	    min_name_wide = (longest_name - prefix_len);
1969    }
1970
1971    min_name_wide++;		/* make sure it's nonzero */
1972    plot_width = (max_width - min_name_wide - 8);
1973    if (plot_width < 10)
1974	plot_width = 10;
1975
1976    if (plot_scale < plot_width)
1977	plot_scale = plot_width;	/* 1:1 */
1978
1979    if (table_opt) {
1980	if (!names_only) {
1981	    printf("INSERTED,DELETED,MODIFIED,");
1982	    if (path_opt)
1983		printf("UNCHANGED,");
1984	}
1985	printf("FILENAME\n");
1986    }
1987#ifdef HAVE_TSEARCH
1988    if (use_tsearch) {
1989	twalk(sorted_data, show_tsearch);
1990    } else
1991#endif
1992	for (p = all_data; p; p = p->link) {
1993	    show_data(p);
1994	}
1995
1996    if (!table_opt && !names_only) {
1997#define PLURAL(n) n, n != 1 ? "s" : ""
1998	if (num_files > 0 || !quiet) {
1999	    printf("%s %d file%s changed", comment_opt, PLURAL(num_files));
2000	    if (total_ins)
2001		printf(", %ld insertion%s(+)", PLURAL(total_ins));
2002	    if (total_del)
2003		printf(", %ld deletion%s(-)", PLURAL(total_del));
2004	    if (total_mod)
2005		printf(", %ld modification%s(!)", PLURAL(total_mod));
2006	    if (total_eql && path_opt != 0)
2007		printf(", %ld unchanged line%s(=)", PLURAL(total_eql));
2008	    (void) putchar('\n');
2009	}
2010    }
2011}
2012
2013#ifdef HAVE_POPEN
2014static const char *
2015get_program(const char *name, const char *dft)
2016{
2017    const char *result = getenv(name);
2018    if (result == 0 || *result == EOS)
2019	result = dft;
2020    TRACE(("get_program(%s) = %s\n", name, result));
2021    return result;
2022}
2023#define GET_PROGRAM(name) get_program("DIFFSTAT_" #name, name)
2024
2025static char *
2026decompressor(Decompress which, const char *name)
2027{
2028    const char *verb = 0;
2029    const char *opts = "";
2030    char *result = 0;
2031    size_t len = strlen(name);
2032
2033    switch (which) {
2034    case dcBzip:
2035	verb = GET_PROGRAM(BZCAT_PATH);
2036	if (*verb == '\0') {
2037	    verb = GET_PROGRAM(BZIP2_PATH);
2038	    opts = "-dc";
2039	}
2040	break;
2041    case dcCompress:
2042	verb = GET_PROGRAM(ZCAT_PATH);
2043	if (*verb == '\0') {
2044	    verb = GET_PROGRAM(UNCOMPRESS_PATH);
2045	    opts = "-c";
2046	    if (*verb == '\0') {
2047		/* not all compress's recognize the options, test this last */
2048		verb = GET_PROGRAM(COMPRESS_PATH);
2049		opts = "-dc";
2050	    }
2051	}
2052	break;
2053    case dcGzip:
2054	verb = GET_PROGRAM(GZIP_PATH);
2055	opts = "-dc";
2056	break;
2057    case dcLzma:
2058	verb = GET_PROGRAM(LZCAT_PATH);
2059	opts = "-dc";
2060	break;
2061    case dcPack:
2062	verb = GET_PROGRAM(PCAT_PATH);
2063	break;
2064    case dcXz:
2065	verb = GET_PROGRAM(XZ_PATH);
2066	opts = "-dc";
2067	break;
2068    case dcEmpty:
2069	/* FALLTHRU */
2070    case dcNone:
2071	break;
2072    }
2073    if (verb != 0 && *verb != '\0') {
2074	result = (char *) xmalloc(strlen(verb) + 10 + len);
2075	sprintf(result, "%s %s", verb, opts);
2076	if (*name != '\0') {
2077	    sprintf(result + strlen(result), " \"%s\"", name);
2078	}
2079    }
2080    return result;
2081}
2082
2083static char *
2084is_compressed(const char *name)
2085{
2086    size_t len = strlen(name);
2087    Decompress which;
2088
2089    if (len > 2 && !strcmp(name + len - 2, ".Z")) {
2090	which = dcCompress;
2091    } else if (len > 2 && !strcmp(name + len - 2, ".z")) {
2092	which = dcPack;
2093    } else if (len > 3 && !strcmp(name + len - 3, ".gz")) {
2094	which = dcGzip;
2095    } else if (len > 4 && !strcmp(name + len - 4, ".bz2")) {
2096	which = dcBzip;
2097    } else if (len > 5 && !strcmp(name + len - 5, ".lzma")) {
2098	which = dcLzma;
2099    } else if (len > 3 && !strcmp(name + len - 3, ".xz")) {
2100	which = dcXz;
2101    } else {
2102	which = dcNone;
2103    }
2104    return decompressor(which, name);
2105}
2106
2107#ifdef HAVE_MKDTEMP
2108#define MY_MKDTEMP(path) mkdtemp(path)
2109#else
2110/*
2111 * mktemp is supposedly marked obsolete at the same point that mkdtemp is
2112 * introduced.
2113 */
2114static char *
2115my_mkdtemp(char *path)
2116{
2117    char *result = mktemp(path);
2118    if (result != 0) {
2119	if (MKDIR(result, 0700) < 0) {
2120	    result = 0;
2121	}
2122    }
2123    return path;
2124}
2125#define MY_MKDTEMP(path) my_mkdtemp(path)
2126#endif
2127
2128static char *
2129copy_stdin(char **dirpath)
2130{
2131    const char *tmp = getenv("TMPDIR");
2132    char *result = 0;
2133    int ch;
2134    FILE *fp;
2135
2136    if (tmp == 0)
2137	tmp = "/tmp/";
2138    *dirpath = xmalloc(strlen(tmp) + 12);
2139
2140    strcpy(*dirpath, tmp);
2141    strcat(*dirpath, "/diffXXXXXX");
2142    if (MY_MKDTEMP(*dirpath) != 0) {
2143	result = xmalloc(strlen(*dirpath) + 10);
2144	sprintf(result, "%s/stdin", *dirpath);
2145
2146	if ((fp = fopen(result, "w")) != 0) {
2147	    while ((ch = MY_GETC(stdin)) != EOF) {
2148		fputc(ch, fp);
2149	    }
2150	    fclose(fp);
2151	} else {
2152	    free(result);
2153	    result = 0;
2154	    rmdir(*dirpath);	/* Assume that the /stdin file was not created */
2155	    free(*dirpath);
2156	    *dirpath = 0;
2157	}
2158    } else {
2159	free(*dirpath);
2160	*dirpath = 0;
2161    }
2162    return result;
2163}
2164#endif
2165
2166static void
2167set_path_opt(char *value, int destination)
2168{
2169    path_opt = value;
2170    path_dest = destination;
2171    if (*path_opt != 0) {
2172	if (is_dir(path_opt)) {
2173	    num_marks = 4;
2174	} else {
2175	    fprintf(stderr, "Not a directory:%s\n", path_opt);
2176	    exit(EXIT_FAILURE);
2177	}
2178    }
2179}
2180
2181static void
2182usage(FILE *fp)
2183{
2184    static const char *msg[] =
2185    {
2186	"Usage: diffstat [options] [files]",
2187	"",
2188	"Reads from one or more input files which contain output from 'diff',",
2189	"producing a histogram of total lines changed for each file referenced.",
2190	"If no filename is given on the command line, reads from standard input.",
2191	"",
2192	"Options:",
2193	"  -c      prefix each line with comment (#)",
2194#if OPT_TRACE
2195	"  -d      debug - prints a lot of information",
2196#endif
2197	"  -D PATH specify location of patched files, use for unchanged-count",
2198	"  -e FILE redirect standard error to FILE",
2199	"  -f NUM  format (0=concise, 1=normal, 2=filled, 4=values)",
2200	"  -h      print this message",
2201	"  -k      do not merge filenames",
2202	"  -l      list filenames only",
2203	"  -m      merge insert/delete data in chunks as modified-lines",
2204	"  -n NUM  specify minimum width for the filenames (default: auto)",
2205	"  -N NUM  specify maximum width for the filenames (default: auto)",
2206	"  -o FILE redirect standard output to FILE",
2207	"  -p NUM  specify number of pathname-separators to strip (default: common)",
2208	"  -q      suppress the \"0 files changed\" message for empty diffs",
2209	"  -r NUM  specify rounding for histogram (0=none, 1=simple, 2=adjusted)",
2210	"  -R      assume patch was created with old and new files swapped",
2211	"  -S PATH specify location of original files, use for unchanged-count",
2212	"  -t      print a table (comma-separated-values) rather than histogram",
2213	"  -u      do not sort the input list",
2214	"  -v      show progress if output is redirected to a file",
2215	"  -V      prints the version number",
2216	"  -w NUM  specify maximum width of the output (default: 80)",
2217    };
2218    unsigned j;
2219    for (j = 0; j < sizeof(msg) / sizeof(msg[0]); j++)
2220	fprintf(fp, "%s\n", msg[j]);
2221}
2222
2223/* Wrapper around getopt that also parses "--help" and "--version".
2224 * argc, argv, opts, return value, and globals optarg, optind,
2225 * opterr, and optopt are as in getopt().  help and version designate
2226 * what should be returned if --help or --version are encountered. */
2227static int
2228getopt_helper(int argc, char *const argv[], const char *opts,
2229	      int help, int version)
2230{
2231    if (optind < argc && argv[optind] != NULL) {
2232	if (strcmp(argv[optind], "--help") == 0) {
2233	    optind++;
2234	    return help;
2235	} else if (strcmp(argv[optind], "--version") == 0) {
2236	    optind++;
2237	    return version;
2238	}
2239    }
2240    return getopt(argc, argv, opts);
2241}
2242
2243int
2244main(int argc, char *argv[])
2245{
2246    int j;
2247    char version[80];
2248
2249    max_width = 80;
2250
2251    while ((j = getopt_helper(argc, argv,
2252			      "bcCdD:e:f:hklmn:N:o:p:qr:RsS:tuvVw:", 'h', 'V'))
2253	   != -1) {
2254	switch (j) {
2255	case 'b':
2256	    suppress_binary = 1;
2257	    break;
2258	case 'c':
2259	    comment_opt = "#";
2260	    break;
2261	case 'C':
2262	    show_colors = 1;
2263	    break;
2264#if OPT_TRACE
2265	case 'd':
2266	    trace_opt = 1;
2267	    break;
2268#endif
2269	case 'D':
2270	    set_path_opt(optarg, 1);
2271	    break;
2272	case 'e':
2273	    if (freopen(optarg, "w", stderr) == 0)
2274		failed(optarg);
2275	    break;
2276	case 'f':
2277	    format_opt = atoi(optarg);
2278	    break;
2279	case 'h':
2280	    usage(stdout);
2281	    return (EXIT_SUCCESS);
2282	case 'k':
2283	    merge_names = 0;
2284	    break;
2285	case 'l':
2286	    names_only = 1;
2287	    break;
2288	case 'm':
2289	    merge_opt = 1;
2290	    break;
2291	case 'n':
2292	    min_name_wide = atoi(optarg);
2293	    break;
2294	case 'N':
2295	    max_name_wide = atoi(optarg);
2296	    break;
2297	case 'o':
2298	    if (freopen(optarg, "w", stdout) == 0)
2299		failed(optarg);
2300	    break;
2301	case 'p':
2302	    prefix_opt = atoi(optarg);
2303	    break;
2304	case 'r':
2305	    round_opt = atoi(optarg);
2306	    break;
2307	case 'R':
2308	    reverse_opt = 1;
2309	    break;
2310	case 's':
2311	    summary_only = 1;
2312	    break;
2313	case 'S':
2314	    set_path_opt(optarg, 0);
2315	    break;
2316	case 't':
2317	    table_opt = 1;
2318	    break;
2319	case 'u':
2320	    sort_names = 0;
2321	    break;
2322	case 'v':
2323	    verbose = 1;
2324	    break;
2325	case 'V':
2326#ifndef	NO_IDENT
2327	    if (!sscanf(Id, "%*s %*s %s", version))
2328#endif
2329		(void) strcpy(version, "?");
2330	    printf("diffstat version %s\n", version);
2331	    return (EXIT_SUCCESS);
2332	case 'w':
2333	    max_width = atoi(optarg);
2334	    break;
2335	case 'q':
2336	    quiet = 1;
2337	    break;
2338	default:
2339	    usage(stderr);
2340	    return (EXIT_FAILURE);
2341	}
2342    }
2343
2344    /*
2345     * The numbers from -S/-D options will only be useful if the merge option
2346     * is added.
2347     */
2348    if (path_opt)
2349	merge_opt = 1;
2350
2351    show_progress = verbose && (!isatty(fileno(stdout))
2352				&& isatty(fileno(stderr)));
2353
2354#ifdef HAVE_TSEARCH
2355    use_tsearch = (sort_names && merge_names);
2356#endif
2357
2358    if (optind < argc) {
2359	while (optind < argc) {
2360	    FILE *fp;
2361	    char *name = argv[optind++];
2362#ifdef HAVE_POPEN
2363	    char *command = is_compressed(name);
2364	    if (command != 0) {
2365		if ((fp = popen(command, "r")) != 0) {
2366		    if (show_progress) {
2367			(void) fprintf(stderr, "%s\n", name);
2368			(void) fflush(stderr);
2369		    }
2370		    do_file(fp, name);
2371		    (void) pclose(fp);
2372		}
2373		free(command);
2374	    } else
2375#endif
2376	    if ((fp = fopen(name, "rb")) != 0) {
2377		if (show_progress) {
2378		    (void) fprintf(stderr, "%s\n", name);
2379		    (void) fflush(stderr);
2380		}
2381		do_file(fp, name);
2382		(void) fclose(fp);
2383	    } else {
2384		failed(name);
2385	    }
2386	}
2387    } else {
2388#ifdef HAVE_POPEN
2389	FILE *fp;
2390	Decompress which = dcEmpty;
2391	char *stdin_dir = 0;
2392	char *myfile;
2393	char sniff[8];
2394	int ch;
2395	unsigned got = 0;
2396	char *command;
2397
2398	if ((ch = MY_GETC(stdin)) != EOF) {
2399	    which = dcNone;
2400	    if (ch == 'B') {	/* perhaps bzip2 (poor magic design...) */
2401		sniff[got++] = (char) ch;
2402		while (got < 5) {
2403		    if ((ch = MY_GETC(stdin)) == EOF)
2404			break;
2405		    sniff[got++] = (char) ch;
2406		}
2407		if (got == 5
2408		    && !strncmp(sniff, "BZh", (size_t) 3)
2409		    && isdigit((unsigned char) sniff[3])
2410		    && isdigit((unsigned char) sniff[4])) {
2411		    which = dcBzip;
2412		}
2413	    } else if (ch == ']') {	/* perhaps lzma */
2414		sniff[got++] = (char) ch;
2415		while (got < 4) {
2416		    if ((ch = MY_GETC(stdin)) == EOF)
2417			break;
2418		    sniff[got++] = (char) ch;
2419		}
2420		if (got == 4
2421		    && !memcmp(sniff, "]\0\0\200", (size_t) 4)) {
2422		    which = dcLzma;
2423		}
2424	    } else if (ch == 0xfd) {	/* perhaps xz */
2425		sniff[got++] = (char) ch;
2426		while (got < 6) {
2427		    if ((ch = MY_GETC(stdin)) == EOF)
2428			break;
2429		    sniff[got++] = (char) ch;
2430		}
2431		if (got == 6
2432		    && !memcmp(sniff, "\3757zXZ\0", (size_t) 6)) {
2433		    which = dcXz;
2434		}
2435	    } else if (ch == '\037') {	/* perhaps compress, etc. */
2436		sniff[got++] = (char) ch;
2437		if ((ch = MY_GETC(stdin)) != EOF) {
2438		    sniff[got++] = (char) ch;
2439		    switch (ch) {
2440		    case 0213:
2441			which = dcGzip;
2442			break;
2443		    case 0235:
2444			which = dcCompress;
2445			break;
2446		    case 0036:
2447			which = dcPack;
2448			break;
2449		    }
2450		}
2451	    } else {
2452		sniff[got++] = (char) ch;
2453	    }
2454	}
2455	/*
2456	 * The C standard only guarantees one ungetc;
2457	 * virtually everyone allows more.
2458	 */
2459	while (got != 0) {
2460	    ungetc(sniff[--got], stdin);
2461	}
2462	if (which != dcNone
2463	    && which != dcEmpty
2464	    && (myfile = copy_stdin(&stdin_dir)) != 0) {
2465
2466	    /* open pipe to decompress temporary file */
2467	    command = decompressor(which, myfile);
2468	    if ((fp = popen(command, "r")) != 0) {
2469		do_file(fp, "stdin");
2470		(void) pclose(fp);
2471	    }
2472	    free(command);
2473
2474	    unlink(myfile);
2475	    free(myfile);
2476	    myfile = 0;
2477	    rmdir(stdin_dir);
2478	    free(stdin_dir);
2479	    stdin_dir = 0;
2480	} else if (which != dcEmpty)
2481#endif
2482	    do_file(stdin, "stdin");
2483    }
2484    summarize();
2485#if defined(NO_LEAKS)
2486    while (all_data != 0) {
2487	delink(all_data);
2488    }
2489#endif
2490    return (EXIT_SUCCESS);
2491}
2492