1/****************************************************************************** 2 * Copyright 1994-2010,2012 by Thomas E. Dickey * 3 * All Rights Reserved. * 4 * * 5 * Permission to use, copy, modify, and distribute this software and its * 6 * documentation for any purpose and without fee is hereby granted, provided * 7 * that the above copyright notice appear in all copies and that both that * 8 * copyright notice and this permission notice appear in supporting * 9 * documentation, and that the name of the above listed copyright holder(s) * 10 * not be used in advertising or publicity pertaining to distribution of the * 11 * software without specific, written prior permission. * 12 * * 13 * THE ABOVE LISTED COPYRIGHT HOLDER(S) DISCLAIM ALL WARRANTIES WITH REGARD * 14 * TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND * 15 * FITNESS, IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE * 16 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * 17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * 18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR * 19 * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * 20 ******************************************************************************/ 21 22#ifndef NO_IDENT 23static const char *Id = "$Id: diffstat.c,v 1.55 2012/01/03 09:44:24 tom Exp $"; 24#endif 25 26/* 27 * Title: diffstat.c 28 * Author: T.E.Dickey 29 * Created: 02 Feb 1992 30 * Modified: 31 * 03 Jan 2012, Correct case for "xz" suffix in is_compressed() 32 * (patch from Frederic Culot in FreeBSD ports). Add 33 * "-R" option. Improve dequoting of filenames in 34 * headers. 35 * 10 Oct 2010, correct display of new files when -S/-D options 36 * are used. Remove the temporary directory on 37 * error, introduced in 1.48+ (patch by Solar 38 * Designer). 39 * 19 Jul 2010, add missing "break" statement which left "-c" 40 * option falling-through into "-C". 41 * 16 Jul 2010, configure "xz" path explicitly, in case lzcat 42 * does not support xz format. Add "-s" (summary) 43 * and "-C" (color) options. 44 * 15 Jul 2010, fix strict gcc warnings, e.g., using const. 45 * 10 Jan 2010, improve a case where filenames have embedded blanks 46 * (patch by Reinier Post). 47 * 07 Nov 2009, correct suffix-check for ".xz" files as 48 * command-line parameters rather than as piped 49 * input (report by Moritz Barsnick). 50 * 06 Oct 2009, fixes to build/run with MSYS or MinGW. use 51 * $TMPDIR for path of temporary file used in 52 * decompression. correct else-condition for 53 * detecting compression type (patch by Zach Hirsch). 54 * 31 Aug 2009, improve lzma support, add support for xz (patch by 55 * Eric Blake). Add special case for no-newline 56 * message from some diff's (Ubuntu #269895). 57 * Improve configure check for getopt(). 58 * 11 Aug 2009, Add logic to check standard input, decompress if 59 * possible. Add -N option, to truncate long names. 60 * Add pack/pcat as a compression type. 61 * Add lzma/lzcat as a compression type. 62 * Allow overriding program paths with environment. 63 * 10 Aug 2009, modify to work with Perforce-style diffs (patch 64 * by Ed Schouten). 65 * 29 Mar 2009, modify to work with patch ".rej" files, which have 66 * no filename header (use the name of the ".rej" 67 * file if it is available). 68 * 29 Sep 2008, fix typo in usage message. 69 * 06 Aug 2008, add "-m", "-S" and "-D" options. 70 * 05 Aug 2008, add "-q" option to suppress 0-files-changed 71 * message (patch by Greg Norris). 72 * 04 Sep 2007, add "-b" option to suppress binary-files (patch 73 * by Greg Norris). 74 * 26 Aug 2007, add "-d" option to show debugging traces, rather 75 * than by defining DEBUG. Add check after 76 * unified-diff chunk to avoid adding non-diff text 77 * (report by Adrian Bunk). Quote pathname passed 78 * in command to gzip/uncompress. Add a check for 79 * default-diff output without the "diff" command 80 * supplied to provide filename, mark as "unknown". 81 * 16 Jul 2006, fix to avoid modifying which is being used by 82 * tsearch() for ordering the binary tree (report by 83 * Adrian Bunk). 84 * 02 Jul 2006, do not ignore pathnames in /tmp/, since some tools 85 * create usable pathnames for both old/new files 86 * there (Debian #376086). Correct ifdef for 87 * fgetc_unlocked(). Add configure check for 88 * compress, gzip and bzip2 programs that may be used 89 * to decompress files. 90 * 24 Aug 2005, update usage message for -l, -r changes. 91 * 15 Aug 2005, apply PLURAL() to num_files (Jean Delvare). 92 * add -l option (request by Michael Burian). 93 * Use fgetc_locked() if available. 94 * 14 Aug 2005, add -r2 option (rounding with adjustment to ensure 95 * that nonzero values always display a histogram 96 * bar), adapted from patch by Jean Delvare. Extend 97 * the -f option (2=filled, 4=verbose). 98 * 12 Aug 2005, modify to use tsearch() for sorted lists. 99 * 11 Aug 2005, minor fixes to scaling of modified lines. Add 100 * -r (round) option. 101 * 05 Aug 2005, add -t (table) option. 102 * 10 Apr 2005, change order of merging and prefix-stripping so 103 * stripping all prefixes, e.g., with -p9, will be 104 * sorted as expected (Patch by Jean Delvare 105 * <khali@linux-fr.org>). 106 * 10 Jan 2005, add support for '--help' and '--version' (Patch 107 * by Eric Blake <ebb9@byu.net>.) 108 * 16 Dec 2004, fix a different case for data beginning with "--" 109 * which was treated as a header line. 110 * 14 Dec 2004, Fix allocation problems. Open files in binary 111 * mode for reading. Getopt returns -1, not 112 * necessarily EOF. Add const where useful. Use 113 * NO_IDENT where necessary. malloc() comes from 114 * <stdlib.h> in standard systems (Patch by Eric 115 * Blake <ebb9@byu.net>.) 116 * 08 Nov 2004, minor fix for resync of unified diffs checks for 117 * range (line beginning with '@' without header 118 * lines (successive lines beginning with "---" and 119 * "+++"). Fix a few problems reported by valgrind. 120 * 09 Nov 2003, modify check for lines beginning with '-' or '+' 121 * to treat only "---" in old-style diffs as a 122 * special case. 123 * 14 Feb 2003, modify check for filenames to allow for some cases 124 * of incomplete dates (the reported example omitted 125 * the day of the month). Correct a typo in usage(). 126 * Add -e, -h, -o options. 127 * 04 Jan 2003, improve tracking of chunks in unified diff, in 128 * case the original files contained a '+' or '-' in 129 * the first column (Debian #155000). Add -v option 130 * (Debian #170947). Modify to allocate buffers big 131 * enough for long input lines. Do additional 132 * merging to handle unusual Index/diff constructs in 133 * recent makepatch script. 134 * 20 Aug 2002, add -u option to tell diffstat to preserve the 135 * order of filenames as given rather than sort them 136 * (request by H Peter Anvin <hpa@zytor.com>). Add 137 * -k option for completeness. 138 * 09 Aug 2002, allow either '/' or '-' as delimiters in dates, 139 * to accommodate diffutils 2.8 (report by Rik van 140 * Riel <riel@conectiva.com.br>). 141 * 10 Oct 2001, add bzip2 (.bz2) suffix as suggested by 142 * Gregory T Norris <haphazard@socket.net> in Debian 143 * bug report #82969). 144 * add check for diff from RCS archive where the 145 * "diff" lines do not reference a filename. 146 * 29 Mar 2000, add -c option. Check for compressed input, read 147 * via pipe. Change to ANSI C. Adapted change from 148 * Troy Engel to add option that displays a number 149 * only, rather than a histogram. 150 * 17 May 1998, handle Debian diff files, which do not contain 151 * dates on the header lines. 152 * 16 Jan 1998, accommodate patches w/o tabs in header lines (e.g., 153 * from cut/paste). Strip suffixes such as ".orig". 154 * 24 Mar 1996, corrected -p0 logic, more fixes in do_merging. 155 * 16 Mar 1996, corrected state-change for "Binary". Added -p 156 * option. 157 * 17 Dec 1995, corrected matching algorithm in 'do_merging()' 158 * 11 Dec 1995, mods to accommodate diffs against /dev/null or 159 * /tmp/XXX (tempfiles). 160 * 06 May 1995, limit scaling -- only shrink-to-fit. 161 * 29 Apr 1995, recognize 'rcsdiff -u' format. 162 * 26 Dec 1994, strip common pathname-prefix. 163 * 13 Nov 1994, added '-n' option. Corrected logic of 'match'. 164 * 17 Jun 1994, ifdef-<string.h> 165 * 12 Jun 1994, recognize unified diff, and output of makepatch. 166 * 04 Oct 1993, merge multiple diff-files, busy message when the 167 * output is piped to a file. 168 * 169 * Function: this program reads the output of 'diff' and displays a histogram 170 * of the insertions/deletions/modifications per-file. 171 */ 172 173#if defined(HAVE_CONFIG_H) 174#include <config.h> 175#endif 176 177#if defined(WIN32) && !defined(HAVE_CONFIG_H) 178#define HAVE_STDLIB_H 179#define HAVE_STRING_H 180#define HAVE_MALLOC_H 181#define HAVE_GETOPT_H 182#endif 183 184#include <stdio.h> 185#include <ctype.h> 186 187#ifdef HAVE_STRING_H 188#include <string.h> 189#else 190#include <strings.h> 191#define strchr index 192#define strrchr rindex 193#endif 194 195#ifdef HAVE_STDLIB_H 196#include <stdlib.h> 197#else 198extern int atoi(const char *); 199#endif 200 201#ifdef HAVE_UNISTD_H 202#include <unistd.h> 203#else 204extern int isatty(int); 205#endif 206 207#ifdef HAVE_MALLOC_H 208#include <malloc.h> 209#endif 210 211#if defined(HAVE_SEARCH_H) && defined(HAVE_TSEARCH) 212#include <search.h> 213#else 214#undef HAVE_TSEARCH 215#endif 216 217#ifdef HAVE_GETC_UNLOCKED 218#define MY_GETC getc_unlocked 219#else 220#define MY_GETC getc 221#endif 222 223#ifdef HAVE_GETOPT_H 224#include <getopt.h> 225#elif !defined(HAVE_GETOPT_HEADER) 226extern int getopt(int, char *const *, const char *); 227extern char *optarg; 228extern int optind; 229#endif 230 231#include <sys/types.h> 232#include <sys/stat.h> 233 234#if !defined(EXIT_SUCCESS) 235#define EXIT_SUCCESS 0 236#define EXIT_FAILURE 1 237#endif 238 239#ifndef BZCAT_PATH 240#define BZCAT_PATH "" 241#endif 242 243#ifndef BZIP2_PATH 244#define BZIP2_PATH "" 245#endif 246 247#ifndef COMPRESS_PATH 248#define COMPRESS_PATH "" 249#endif 250 251#ifndef GZIP_PATH 252#define GZIP_PATH "" 253#endif 254 255#ifndef LZCAT_PATH 256#define LZCAT_PATH "" 257#endif 258 259#ifndef PCAT_PATH 260#define PCAT_PATH "" 261#endif 262 263#ifndef UNCOMPRESS_PATH 264#define UNCOMPRESS_PATH "" 265#endif 266 267#ifndef XZ_PATH 268#define XZ_PATH "" 269#endif 270 271#ifndef ZCAT_PATH 272#define ZCAT_PATH "" 273#endif 274 275/******************************************************************************/ 276 277#if defined(__MINGW32__) || defined(WIN32) 278#define MKDIR(name,mode) mkdir(name) 279#else 280#define MKDIR(name,mode) mkdir(name,mode) 281#endif 282 283#if defined(WIN32) && !defined(__MINGW32__) 284#define PATHSEP '\\' 285#else 286#define PATHSEP '/' 287#endif 288 289#define SQUOTE '\'' 290#define EOS '\0' 291#define BLANK ' ' 292 293#define UC(c) ((unsigned char)(c)) 294 295#ifndef OPT_TRACE 296#define OPT_TRACE 1 297#endif 298 299#if OPT_TRACE 300#define TRACE(p) if (trace_opt) printf p 301#else 302#define TRACE(p) /*nothing */ 303#endif 304 305#define contain_any(s,reject) (strcspn(s,reject) != strlen(s)) 306 307#define HAVE_NOTHING 0 308#define HAVE_GENERIC 1 /* e.g., "Index: foo" w/o pathname */ 309#define HAVE_PATH 2 /* reference-file from "diff dirname/foo" */ 310#define HAVE_PATH2 4 /* comparison-file from "diff dirname/foo" */ 311 312#define FMT_CONCISE 0 313#define FMT_NORMAL 1 314#define FMT_FILLED 2 315#define FMT_VERBOSE 4 316 317typedef enum comment { 318 Normal, Only, Binary 319} Comment; 320 321#define MARKS 4 /* each of +, - and ! */ 322 323typedef enum { 324 cInsert = 0, 325 cDelete, 326 cModify, 327 cEquals 328} Change; 329 330#define InsOf(p) (p)->count[cInsert] /* "+" count inserted lines */ 331#define DelOf(p) (p)->count[cDelete] /* "-" count deleted lines */ 332#define ModOf(p) (p)->count[cModify] /* "!" count modified lines */ 333#define EqlOf(p) (p)->count[cEquals] /* "=" count unmodified lines */ 334 335#define TotalOf(p) (InsOf(p) + DelOf(p) + ModOf(p) + EqlOf(p)) 336#define for_each_mark(n) for (n = 0; n < num_marks; ++n) 337 338typedef struct _data { 339 struct _data *link; 340 char *name; /* the filename */ 341 int copy; /* true if filename is const-literal */ 342 int base; /* beginning of name if -p option used */ 343 Comment cmt; 344 int pending; 345 long chunks; /* total number of chunks */ 346 long chunk[MARKS]; /* counts for the current chunk */ 347 long count[MARKS]; /* counts for the file */ 348} DATA; 349 350typedef enum { 351 dcNone = 0, 352 dcBzip, 353 dcCompress, 354 dcGzip, 355 dcLzma, 356 dcPack, 357 dcXz, 358 dcEmpty 359} Decompress; 360 361static const char marks[MARKS + 1] = "+-!="; 362static const int colors[MARKS + 1] = 363{2, 1, 6, 4}; 364 365static DATA *all_data; 366static const char *comment_opt = ""; 367static char *path_opt = 0; 368static int format_opt = FMT_NORMAL; 369static int max_width; /* the specified width-limit */ 370static int merge_names = 1; /* true if we merge similar filenames */ 371static int merge_opt = 0; /* true if we merge ins/del as modified */ 372static int min_name_wide; /* minimum amount reserved for filenames */ 373static int max_name_wide; /* maximum amount reserved for filenames */ 374static int names_only; /* true if we list filenames only */ 375static int num_marks = 3; /* 3 or 4, according to "-P" option */ 376static int reverse_opt; /* true if results are reversed */ 377static int show_colors; /* true if showing SGR colors */ 378static int show_progress; /* if not writing to tty, show progress */ 379static int summary_only = 0; /* true if only summary line is shown */ 380static int path_dest; /* true if path_opt is destination (patched) */ 381static int plot_width; /* the amount left over for histogram */ 382static int prefix_opt = -1; /* if positive, controls stripping of PATHSEP */ 383static int round_opt = 0; /* if nonzero, round data for histogram */ 384static int table_opt = 0; /* if nonzero, write table rather than plot */ 385static int trace_opt = 0; /* if nonzero, write debugging information */ 386static int sort_names = 1; /* true if we sort filenames */ 387static int verbose = 0; /* -v option */ 388static int quiet = 0; /* -q option */ 389static int suppress_binary = 0; /* -b option */ 390static long plot_scale; /* the effective scale (1:maximum) */ 391 392#ifdef HAVE_TSEARCH 393static int use_tsearch; 394static void *sorted_data; 395#endif 396 397static int prefix_len = -1; 398 399/******************************************************************************/ 400 401static void 402failed(const char *s) 403{ 404 perror(s); 405 exit(EXIT_FAILURE); 406} 407 408/* malloc wrapper that never returns NULL */ 409static void * 410xmalloc(size_t s) 411{ 412 void *p; 413 if ((p = malloc(s)) == NULL) 414 failed("malloc"); 415 return p; 416} 417 418static int 419is_dir(const char *name) 420{ 421 struct stat sb; 422 return (stat(name, &sb) == 0 && 423 (sb.st_mode & S_IFMT) == S_IFDIR); 424} 425 426static void 427blip(int c) 428{ 429 if (show_progress) { 430 (void) fputc(c, stderr); 431 (void) fflush(stderr); 432 } 433} 434 435static char * 436new_string(const char *s) 437{ 438 return strcpy((char *) xmalloc((size_t) (strlen(s) + 1)), s); 439} 440 441static int 442compare_data(const void *a, const void *b) 443{ 444 const DATA *p = (const DATA *) a; 445 const DATA *q = (const DATA *) b; 446 return strcmp(p->name + p->base, q->name + q->base); 447} 448 449static void 450init_data(DATA * data, const char *name, int copy, int base) 451{ 452 memset(data, 0, sizeof(*data)); 453 data->name = (char *) name; 454 data->copy = copy; 455 data->base = base; 456 data->cmt = Normal; 457} 458 459static DATA * 460new_data(const char *name, int base) 461{ 462 DATA *r = (DATA *) xmalloc(sizeof(DATA)); 463 464 init_data(r, new_string(name), 0, base); 465 466 return r; 467} 468 469#ifdef HAVE_TSEARCH 470static DATA * 471add_tsearch_data(const char *name, int base) 472{ 473 DATA find; 474 DATA *result; 475 void *pp; 476 477 init_data(&find, name, 1, base); 478 if ((pp = tfind(&find, &sorted_data, compare_data)) != 0) { 479 result = *(DATA **) pp; 480 return result; 481 } 482 result = new_data(name, base); 483 (void) tsearch(result, &sorted_data, compare_data); 484 result->link = all_data; 485 all_data = result; 486 487 return result; 488} 489#endif 490 491static DATA * 492find_data(const char *name) 493{ 494 DATA *p, *q, *r; 495 DATA find; 496 int base = 0; 497 498 TRACE(("** find_data(%s)\n", name)); 499 500 /* Compute the base offset if the prefix option is used */ 501 if (prefix_opt >= 0) { 502 int n; 503 504 for (n = prefix_opt; n > 0; n--) { 505 char *s = strchr(name + base, PATHSEP); 506 if (s == 0 || *++s == EOS) 507 break; 508 base = (int) (s - name); 509 } 510 TRACE(("** base set to %d\n", base)); 511 } 512 513 /* Insert into sorted list (usually sorted). If we are not sorting or 514 * merging names, we fall off the end and link the new entry to the end of 515 * the list. If the prefix option is used, the prefix is ignored by the 516 * merge and sort operations. 517 * 518 * If we have tsearch(), we will maintain the sorted list using it and 519 * tfind(). 520 */ 521#ifdef HAVE_TSEARCH 522 if (use_tsearch) { 523 r = add_tsearch_data(name, base); 524 } else 525#endif 526 { 527 init_data(&find, name, 1, base); 528 for (p = all_data, q = 0; p != 0; q = p, p = p->link) { 529 int cmp = compare_data(p, &find); 530 if (merge_names && (cmp == 0)) 531 return p; 532 if (sort_names && (cmp > 0)) 533 break; 534 } 535 r = new_data(name, base); 536 if (q != 0) 537 q->link = r; 538 else 539 all_data = r; 540 541 r->link = p; 542 } 543 544 return r; 545} 546 547/* 548 * Remove a unneeded data item from the linked list. Free the name as well. 549 */ 550static int 551delink(DATA * data) 552{ 553 DATA *p, *q; 554 555 TRACE(("** delink '%s'\n", data->name)); 556 557#ifdef HAVE_TSEARCH 558 if (use_tsearch) { 559 if (tdelete(data, &sorted_data, compare_data) == 0) 560 return 0; 561 } 562#endif 563 for (p = all_data, q = 0; p != 0; q = p, p = p->link) { 564 if (p == data) { 565 if (q != 0) 566 q->link = p->link; 567 else 568 all_data = p->link; 569 if (!p->copy) 570 free(p->name); 571 free(p); 572 return 1; 573 } 574 } 575 return 0; 576} 577 578/* 579 * Compare string 's' against a constant, returning either a pointer just 580 * past the matched part of 's' if it matches exactly, or null if a mismatch 581 * was found. 582 */ 583static char * 584match(char *s, const char *p) 585{ 586 int ok = 0; 587 588 while (*s != EOS) { 589 if (*p == EOS) { 590 ok = 1; 591 break; 592 } 593 if (*s++ != *p++) 594 break; 595 if (*s == EOS && *p == EOS) { 596 ok = 1; 597 break; 598 } 599 } 600 return ok ? s : 0; 601} 602 603static int 604version_num(const char *s) 605{ 606 int main_ver, sub_ver; 607 char temp[2]; 608 return (sscanf(s, "%d.%d%c", &main_ver, &sub_ver, temp) == 2); 609} 610 611/* 612 * Check for a range of line-numbers, used in editing scripts. 613 */ 614static int 615edit_range(const char *s) 616{ 617 int first, last; 618 char temp[2]; 619 return (sscanf(s, "%d,%d%c", &first, &last, temp) == 2) 620 || (sscanf(s, "%d%c", &first, temp) == 1); 621} 622 623/* 624 * Decode a range for default diff. 625 */ 626static int 627decode_default(char *s, 628 long *first, long *first_size, 629 long *second, long *second_size) 630{ 631 int rc = 0; 632 char *next; 633 634 if (isdigit(UC(*s))) { 635 *first_size = 1; 636 *second_size = 1; 637 638 *first = strtol(s, &next, 10); 639 if (next != 0 && next != s) { 640 if (*next == ',') { 641 s = ++next; 642 *first_size = strtol(s, &next, 10) + 1 - *first; 643 } 644 } 645 if (next != 0 && next != s) { 646 switch (*next++) { 647 case 'a': 648 case 'c': 649 case 'd': 650 s = next; 651 *second = strtol(s, &next, 10); 652 if (next != 0 && next != s) { 653 if (*next == ',') { 654 s = ++next; 655 *second_size = strtol(s, &next, 10) + 1 - *second; 656 } 657 } 658 if (next != 0 && next != s && *next == EOS) 659 rc = 1; 660 break; 661 } 662 } 663 } 664 return rc; 665} 666 667/* 668 * Decode a range for unified diff. Oddly, the comments in diffutils code 669 * claim that both numbers are line-numbers. However, inspection of the output 670 * shows that the numbers are a line-number followed by a count. 671 */ 672static int 673decode_range(const char *s, int *first, int *second) 674{ 675 int rc = 0; 676 char check; 677 678 if (isdigit(UC(*s))) { 679 if (sscanf(s, "%d,%d%c", first, second, &check) == 2) { 680 TRACE(("** decode_range #1 first=%d, second=%d\n", *first, *second)); 681 rc = 1; 682 } else if (sscanf(s, "%d%c", first, &check) == 1) { 683 *second = *first; /* diffutils 2.7 does this */ 684 TRACE(("** decode_range #2 first=%d, second=%d\n", *first, *second)); 685 rc = 1; 686 } 687 } 688 return rc; 689} 690 691static int 692HadDiffs(const DATA * data) 693{ 694 return InsOf(data) != 0 695 || DelOf(data) != 0 696 || ModOf(data) != 0 697 || data->cmt != Normal; 698} 699 700/* 701 * If the given path is not one of the "ignore" paths, then return true. 702 */ 703static int 704can_be_merged(const char *path) 705{ 706 int result = 0; 707 if (strcmp(path, "") 708 && strcmp(path, "/dev/null")) 709 result = 1; 710 return result; 711} 712 713static int 714is_leaf(const char *theLeaf, const char *path) 715{ 716 char *s; 717 718 if (strchr(theLeaf, PATHSEP) == 0 719 && (s = strrchr(path, PATHSEP)) != 0 720 && !strcmp(++s, theLeaf)) 721 return 1; 722 return 0; 723} 724 725static char * 726trim_datapath(DATA ** datap, size_t length, int *localp) 727{ 728 char *target = (*datap)->name; 729 730#ifdef HAVE_TSEARCH 731 /* 732 * If we are using tsearch(), make a local copy of the data 733 * so we can trim it without interfering with tsearch's 734 * notion of the ordering of data. That will create some 735 * spurious empty data, so we add the changed() macro in a 736 * few places to skip over those. 737 */ 738 if (use_tsearch) { 739 char *trim = new_string(target); 740 trim[length] = EOS; 741 *datap = add_tsearch_data(trim, (*datap)->base); 742 target = (*datap)->name; 743 free(trim); 744 *localp = 1; 745 } else 746#endif 747 target[length] = EOS; 748 749 return target; 750} 751 752/* 753 * The 'data' parameter points to the first of two markers, while 754 * 'path' is the pathname from the second marker. 755 * 756 * On the first call for 757 * a given file, the 'data' parameter stores no differences. 758 */ 759static char * 760do_merging(DATA * data, char *path, int *freed) 761{ 762 char *target = reverse_opt ? path : data->name; 763 char *source = reverse_opt ? data->name : path; 764 char *result = source; 765 766 TRACE(("** do_merging(\"%s\",\"%s\") diffs:%d\n", 767 data->name, path, HadDiffs(data))); 768 769 *freed = 0; 770 if (!HadDiffs(data)) { 771 772 if (is_leaf(target, source)) { 773 TRACE(("** is_leaf: \"%s\" vs \"%s\"\n", target, source)); 774 if (reverse_opt) { 775 TRACE((".. no action @%d\n", __LINE__)); 776 } else { 777 *freed = delink(data); 778 } 779 } else if (can_be_merged(target) 780 && can_be_merged(source)) { 781 size_t len1 = strlen(target); 782 size_t len2 = strlen(source); 783 size_t n; 784 int matched = 0; 785 int diff = 0; 786 int local = 0; 787 788 /* 789 * If the source/target differ only by some suffix, e.g., ".orig" 790 * or ".bak", strip that off. The target may may also be a 791 * temporary filename (which would not be merged since it has no 792 * apparent relationship to the current). 793 */ 794 if (len1 > len2) { 795 if (!strncmp(target, source, len2)) { 796 TRACE(("** trimming data \"%s\" to \"%.*s\"\n", 797 target, (int) len2, target)); 798 if (reverse_opt) { 799 TRACE((".. no action @%d\n", __LINE__)); 800 } else { 801 target = trim_datapath(&data, len1 = len2, &local); 802 } 803 } 804 } else if (len1 < len2) { 805 if (!strncmp(target, source, len1)) { 806 TRACE(("** trimming source \"%s\" to \"%.*s\"\n", 807 source, (int) len1, source)); 808 if (reverse_opt) { 809 TRACE((".. no action @%d\n", __LINE__)); 810 } else { 811 source[len2 = len1] = EOS; 812 } 813 } 814 } 815 816 /* 817 * If there was no "-p" option, look for the best match by 818 * stripping prefixes from both source/target strings. 819 */ 820 if (prefix_opt < 0) { 821 /* 822 * Now (whether or not we trimmed a suffix), scan back from the 823 * end of source/target strings to find if they happen to share 824 * a common ending, e.g., a/b/c versus d/b/c. If the strings 825 * are not identical, then 'diff' will be set, but if they have 826 * a common ending then 'matched' will be set. 827 */ 828 for (n = 1; n <= len1 && n <= len2; n++) { 829 if (target[len1 - n] != source[len2 - n]) { 830 diff = (int) n; 831 break; 832 } 833 if (source[len2 - n] == PATHSEP) { 834 matched = (int) n; 835 } 836 } 837 838 TRACE(("** merge @%d, prefix_opt=%d matched=%d diff=%d\n", 839 __LINE__, prefix_opt, matched, diff)); 840 if (matched != 0 && diff) { 841 if (reverse_opt) { 842 TRACE((".. no action @%d\n", __LINE__)); 843 } else { 844 result = source + ((int) len2 - matched + 1); 845 } 846 } 847 } 848 849 if (!local) { 850 if (reverse_opt) { 851 TRACE((".. no action @%d\n", __LINE__)); 852 } else { 853 *freed = delink(data); 854 } 855 } 856 } else if (reverse_opt) { 857 TRACE((".. no action @%d\n", __LINE__)); 858 if (can_be_merged(source)) { 859 TRACE(("** merge @%d\n", __LINE__)); 860 } else { 861 TRACE(("** do not merge, retain @%d\n", __LINE__)); 862 /* must not merge, retain existing name */ 863 result = target; 864 } 865 } else { 866 if (can_be_merged(source)) { 867 TRACE(("** merge @%d\n", __LINE__)); 868 *freed = delink(data); 869 } else { 870 TRACE(("** do not merge, retain @%d\n", __LINE__)); 871 /* must not merge, retain existing name */ 872 result = target; 873 } 874 } 875 } else if (reverse_opt) { 876 TRACE((".. no action @%d\n", __LINE__)); 877 if (can_be_merged(source)) { 878 TRACE(("** merge @%d\n", __LINE__)); 879 result = target; 880 } else { 881 TRACE(("** do not merge, retain @%d\n", __LINE__)); 882 } 883 } else { 884 if (can_be_merged(source)) { 885 TRACE(("** merge @%d\n", __LINE__)); 886 } else { 887 TRACE(("** do not merge, retain @%d\n", __LINE__)); 888 result = target; 889 } 890 } 891 TRACE(("** finish do_merging ->\"%s\"\n", result)); 892 return result; 893} 894 895static int 896begin_data(const DATA * p) 897{ 898 if (!can_be_merged(p->name) 899 && strchr(p->name, PATHSEP) != 0) { 900 TRACE(("** begin_data:HAVE_PATH\n")); 901 return HAVE_PATH; 902 } 903 TRACE(("** begin_data:HAVE_GENERIC\n")); 904 return HAVE_GENERIC; 905} 906 907static char * 908skip_blanks(char *s) 909{ 910 while (isspace(UC(*s))) 911 ++s; 912 return s; 913} 914 915/* 916 * Skip a filename, which may be in quotes, to allow embedded blanks in the 917 * name. 918 */ 919static char * 920skip_filename(char *s) 921{ 922 if (*s == SQUOTE && s[1] != EOS && strchr(s + 1, SQUOTE)) { 923 ++s; 924 while (*s != EOS && (*s != SQUOTE) && isgraph(UC(*s))) { 925 ++s; 926 } 927 ++s; 928 } else { 929 while (*s != EOS && isgraph(UC(*s))) { 930 ++s; 931 } 932 } 933 return s; 934} 935 936static char * 937skip_options(char *params) 938{ 939 while (*params != EOS) { 940 params = skip_blanks(params); 941 if (*params == '-') { 942 while (isgraph(UC(*params))) 943 params++; 944 } else { 945 break; 946 } 947 } 948 return skip_blanks(params); 949} 950 951/* 952 * Strip single-quotes from a name (needed for recent makepatch versions). 953 */ 954static void 955dequote(char *s) 956{ 957 size_t len = strlen(s); 958 int n; 959 960 if (*s == SQUOTE && len > 2 && s[len - 1] == SQUOTE) { 961 for (n = 0; (s[n] = s[n + 1]) != EOS; ++n) { 962 ; 963 } 964 s[len - 2] = EOS; 965 } 966} 967 968/* 969 * Allocate a fixed-buffer 970 */ 971static void 972fixed_buffer(char **buffer, size_t want) 973{ 974 *buffer = (char *) xmalloc(want); 975} 976 977/* 978 * Reallocate a fixed-buffer 979 */ 980static void 981adjust_buffer(char **buffer, size_t want) 982{ 983 if ((*buffer = (char *) realloc(*buffer, want)) == 0) 984 failed("realloc"); 985} 986 987/* 988 * Read until newline or end-of-file, allocating the line-buffer so it is long 989 * enough for the input. 990 */ 991static int 992get_line(char **buffer, size_t *have, FILE *fp) 993{ 994 int ch; 995 size_t used = 0; 996 997 while ((ch = MY_GETC(fp)) != EOF) { 998 if (used + 2 > *have) { 999 adjust_buffer(buffer, *have *= 2); 1000 } 1001 (*buffer)[used++] = (char) ch; 1002 if (ch == '\n') 1003 break; 1004 } 1005 (*buffer)[used] = EOS; 1006 return (used != 0); 1007} 1008 1009static char * 1010data_filename(const DATA * p) 1011{ 1012 return (p->name + (prefix_opt >= 0 ? p->base : prefix_len)); 1013} 1014 1015/* 1016 * Count the (new)lines in a file, return -1 if the file is not found. 1017 */ 1018static int 1019count_lines(DATA * p) 1020{ 1021 int result = -1; 1022 char *filename = 0; 1023 char *filetail = data_filename(p); 1024 size_t want = strlen(path_opt) + 2 + strlen(filetail); 1025 FILE *fp; 1026 int ch; 1027 1028 if ((filename = malloc(want)) != 0) { 1029 int merge = 0; 1030 1031 if (path_dest) { 1032 size_t path_len = strlen(path_opt); 1033 size_t tail_len; 1034 char *tail_sep = strchr(filetail, PATHSEP); 1035 1036 if (tail_sep != 0) { 1037 tail_len = (size_t) (tail_sep - filetail); 1038 if (tail_len != 0 && tail_len <= path_len) { 1039 if (tail_len < path_len 1040 && path_opt[path_len - tail_len - 1] != PATHSEP) { 1041 merge = 0; 1042 } else if (!strncmp(path_opt + path_len - tail_len, 1043 filetail, 1044 tail_len - 1)) { 1045 merge = 1; 1046 if (path_len > tail_len) { 1047 sprintf(filename, "%.*s%c%s", 1048 (int) (path_len - tail_len), 1049 path_opt, 1050 PATHSEP, 1051 filetail); 1052 } else { 1053 strcpy(filename, filetail); 1054 } 1055 } 1056 } 1057 } 1058 } 1059 if (!merge) { 1060 sprintf(filename, "%s%c%s", path_opt, PATHSEP, filetail); 1061 } 1062 1063 TRACE(("count_lines %s\n", filename)); 1064 if ((fp = fopen(filename, "r")) != 0) { 1065 result = 0; 1066 while ((ch = MY_GETC(fp)) != EOF) { 1067 if (ch == '\n') 1068 ++result; 1069 } 1070 fclose(fp); 1071 } else { 1072 fprintf(stderr, "Cannot open %s\n", filename); 1073 } 1074 free(filename); 1075 } else { 1076 failed("count_lines"); 1077 } 1078 return result; 1079} 1080 1081static void 1082update_chunk(DATA * p, Change change) 1083{ 1084 if (merge_opt) { 1085 p->pending += 1; 1086 p->chunk[change] += 1; 1087 } else { 1088 p->count[change] += 1; 1089 } 1090} 1091 1092static void 1093finish_chunk(DATA * p) 1094{ 1095 int i; 1096 1097 if (p->pending) { 1098 p->pending = 0; 1099 p->chunks += 1; 1100 if (merge_opt) { 1101 /* 1102 * This is crude, but to make it really precise we would have 1103 * to keep an array of line-numbers to which which in a chunk 1104 * are marked as insert/delete. 1105 */ 1106 if (p->chunk[cInsert] && p->chunk[cDelete]) { 1107 long change; 1108 if (p->chunk[cInsert] > p->chunk[cDelete]) { 1109 change = p->chunk[cDelete]; 1110 } else { 1111 change = p->chunk[cInsert]; 1112 } 1113 p->chunk[cInsert] -= change; 1114 p->chunk[cDelete] -= change; 1115 p->chunk[cModify] += change; 1116 } 1117 } 1118 for_each_mark(i) { 1119 p->count[i] += p->chunk[i]; 1120 p->chunk[i] = 0; 1121 } 1122 } 1123} 1124 1125#define date_delims(a,b) (((a)=='/' && (b)=='/') || ((a) == '-' && (b) == '-')) 1126#define CASE_TRACE() TRACE(("** handle case for '%c' %d:%s\n", *buffer, ok, that ? that->name : "")) 1127 1128static void 1129do_file(FILE *fp, const char *default_name) 1130{ 1131 static const char *only_stars = "***************"; 1132 1133 DATA dummy; 1134 DATA *that = &dummy; 1135 DATA *prev = 0; 1136 char *buffer = 0; 1137 char *b_fname = 0; 1138 char *b_temp1 = 0; 1139 char *b_temp2 = 0; 1140 char *b_temp3 = 0; 1141 size_t length = 0; 1142 size_t fixed = 0; 1143 int ok = HAVE_NOTHING; 1144 int marker; 1145 int freed = 0; 1146 1147 int unified = 0; 1148 int old_unify = 0; 1149 int new_unify = 0; 1150 int expect_unify = 0; 1151 1152 long old_dft = 0; 1153 long new_dft = 0; 1154 1155 int context = 1; 1156 1157 char *s; 1158#if OPT_TRACE 1159 int line_no = 0; 1160#endif 1161 1162 init_data(&dummy, "", 1, 0); 1163 1164 fixed_buffer(&buffer, fixed = length = BUFSIZ); 1165 fixed_buffer(&b_fname, length); 1166 fixed_buffer(&b_temp1, length); 1167 fixed_buffer(&b_temp2, length); 1168 fixed_buffer(&b_temp3, length); 1169 1170 while (get_line(&buffer, &length, fp)) { 1171 /* 1172 * Adjust size of fixed-buffers so that a sscanf cannot overflow. 1173 */ 1174 if (length > fixed) { 1175 fixed = length; 1176 adjust_buffer(&b_fname, length); 1177 adjust_buffer(&b_temp1, length); 1178 adjust_buffer(&b_temp2, length); 1179 adjust_buffer(&b_temp3, length); 1180 } 1181 1182 /* 1183 * Trim trailing newline. 1184 */ 1185 for (s = buffer + strlen(buffer); s > buffer; s--) { 1186 if ((UC(s[-1]) == '\n') || (UC(s[-1]) == '\r')) 1187 s[-1] = EOS; 1188 else 1189 break; 1190 } 1191 ++line_no; 1192 TRACE(("[%05d] %s\n", line_no, buffer)); 1193 1194 /* 1195 * "patch -U" can create ".rej" files lacking a filename header, 1196 * in unified format. Check for those. 1197 */ 1198 if (line_no == 1 && !strncmp(buffer, "@@", (size_t) 2)) { 1199 unified = 2; 1200 that = find_data(default_name); 1201 ok = begin_data(that); 1202 } 1203 1204 /* 1205 * The lines identifying files in a context diff depend on how it was 1206 * invoked. But after the header, each chunk begins with a line 1207 * containing 15 *'s. Each chunk may contain a line-range with '***' 1208 * for the "before", and a line-range with '---' for the "after". The 1209 * part of the chunk depicting the deletion may be absent, though the 1210 * edit line is present. 1211 * 1212 * The markers for unified diff are a little different from the normal 1213 * context-diff. Also, the edit-lines in a unified diff won't have a 1214 * space in column 2. Because of the missing space, we have to count 1215 * lines to ensure we do not confuse the marker lines. 1216 */ 1217 marker = 0; 1218 if (that != &dummy && !strcmp(buffer, only_stars)) { 1219 finish_chunk(that); 1220 TRACE(("** begin context chunk\n")); 1221 context = 2; 1222 } else if (line_no == 1 && !strcmp(buffer, only_stars)) { 1223 TRACE(("** begin context chunk\n")); 1224 context = 2; 1225 that = find_data(default_name); 1226 ok = begin_data(that); 1227 } else if (context == 2 && match(buffer, "*** ")) { 1228 context = 1; 1229 } else if (context == 1 && match(buffer, "--- ")) { 1230 marker = 1; 1231 context = 0; 1232 } else if (match(buffer, "*** ")) { 1233 } else if ((old_unify + new_unify) == 0 && match(buffer, "==== ")) { 1234 finish_chunk(that); 1235 unified = 2; 1236 } else if ((old_unify + new_unify) == 0 && match(buffer, "--- ")) { 1237 finish_chunk(that); 1238 marker = unified = 1; 1239 } else if ((old_unify + new_unify) == 0 && match(buffer, "+++ ")) { 1240 marker = unified = 2; 1241 } else if (unified == 2 1242 || ((old_unify + new_unify) == 0 && (*buffer == '@'))) { 1243 finish_chunk(that); 1244 unified = 0; 1245 if (*buffer == '@') { 1246 int old_base, new_base, old_size, new_size; 1247 char test_at; 1248 1249 old_unify = new_unify = 0; 1250 if (sscanf(buffer, "@@ -%[0-9,] +%[0-9,] @%c", 1251 b_temp1, 1252 b_temp2, 1253 &test_at) == 3 1254 && test_at == '@' 1255 && decode_range(b_temp1, &old_base, &old_size) 1256 && decode_range(b_temp2, &new_base, &new_size)) { 1257 old_unify = old_size; 1258 new_unify = new_size; 1259 unified = -1; 1260 } 1261 } 1262 } else if (unified == 1 && !context) { 1263 /* 1264 * If unified==1, we guessed we would find a "+++" line, but since 1265 * we are here, we did not find that. The context check ensures 1266 * we do not mistake the "---" for a unified diff with that for 1267 * a context diff's "after" line-range. 1268 * 1269 * If we guessed wrong, then we probably found a data line with 1270 * "--" in the first two columns of the diff'd file. 1271 */ 1272 unified = 0; 1273 TRACE(("?? Expected \"+++\" for unified diff\n")); 1274 if (prev != 0 1275 && prev != that 1276 && InsOf(that) == 0 1277 && DelOf(that) == 0 1278 && strcmp(prev->name, that->name)) { 1279 TRACE(("?? giveup on %ld/%ld %s\n", InsOf(that), 1280 DelOf(that), that->name)); 1281 TRACE(("?? revert to %ld/%ld %s\n", InsOf(prev), 1282 DelOf(prev), prev->name)); 1283 (void) delink(that); 1284 that = prev; 1285 update_chunk(that, cDelete); 1286 } 1287 } else if (old_unify + new_unify) { 1288 switch (*buffer) { 1289 case '-': 1290 if (old_unify) 1291 --old_unify; 1292 break; 1293 case '+': 1294 if (new_unify) 1295 --new_unify; 1296 break; 1297 case EOS: 1298 case ' ': 1299 if (old_unify) 1300 --old_unify; 1301 if (new_unify) 1302 --new_unify; 1303 break; 1304 case '\\': 1305 if (strstr(buffer, "newline") != 0) { 1306 break; 1307 } 1308 /* FALLTHRU */ 1309 default: 1310 TRACE(("?? expected more in chunk\n")); 1311 old_unify = new_unify = 0; 1312 break; 1313 } 1314 if (!(old_unify + new_unify)) { 1315 expect_unify = 2; 1316 } 1317 } else { 1318 long old_base, new_base; 1319 1320 unified = 0; 1321 1322 if (line_no == 1 1323 && decode_default(buffer, 1324 &old_base, &old_dft, 1325 &new_base, &new_dft)) { 1326 TRACE(("DFT %ld,%ld -> %ld,%ld\n", 1327 old_base, old_base + old_dft - 1, 1328 new_base, new_base + new_dft - 1)); 1329 finish_chunk(that); 1330 that = find_data("unknown"); 1331 ok = begin_data(that); 1332 } 1333 } 1334 1335 /* 1336 * If the previous line ended a chunk of a unified diff, we may begin 1337 * another chunk, or begin another type of diff. If neither, do not 1338 * continue to accumulate counts for the unified diff which has ended. 1339 */ 1340 if (expect_unify != 0) { 1341 if (expect_unify-- == 1) { 1342 if (unified == 0) { 1343 TRACE(("?? did not get chunk\n")); 1344 finish_chunk(that); 1345 that = &dummy; 1346 } 1347 } 1348 } 1349 1350 /* 1351 * Override the beginning of the line to simplify the case statement 1352 * below. 1353 */ 1354 if (marker > 0) { 1355 TRACE(("** have marker=%d, override %s\n", marker, buffer)); 1356 (void) strncpy(buffer, "***", (size_t) 3); 1357 } 1358 1359 /* 1360 * Use the first character of the input line to determine its 1361 * type: 1362 */ 1363 switch (*buffer) { 1364 case 'O': /* Only */ 1365 CASE_TRACE(); 1366 if (match(buffer, "Only in ")) { 1367 char *path = buffer + 8; 1368 int found = 0; 1369 for (s = path; *s != EOS; s++) { 1370 if (match(s, ": ")) { 1371 found = 1; 1372 *s++ = PATHSEP; 1373 while ((s[0] = s[1]) != EOS) 1374 s++; 1375 break; 1376 } 1377 } 1378 if (found) { 1379 blip('.'); 1380 finish_chunk(that); 1381 that = find_data(path); 1382 that->cmt = Only; 1383 ok = HAVE_NOTHING; 1384 } 1385 } 1386 break; 1387 1388 /* 1389 * Several different scripts produce "Index:" lines 1390 * (e.g., "makepatch"). Not all bother to put the 1391 * pathname of the files; some put only the leaf names. 1392 */ 1393 case 'I': 1394 CASE_TRACE(); 1395 if ((s = match(buffer, "Index: ")) != 0) { 1396 s = skip_blanks(s); 1397 dequote(s); 1398 blip('.'); 1399 finish_chunk(that); 1400 s = do_merging(that, s, &freed); 1401 that = find_data(s); 1402 ok = begin_data(that); 1403 } 1404 break; 1405 1406 case 'd': /* diff command trace */ 1407 CASE_TRACE(); 1408 if ((s = match(buffer, "diff ")) != 0 1409 && *(s = skip_options(s)) != EOS) { 1410 if (reverse_opt) { 1411 *skip_filename(s) = EOS; 1412 } else { 1413 s = skip_filename(s); 1414 s = skip_blanks(s); 1415 } 1416 dequote(s); 1417 blip('.'); 1418 finish_chunk(that); 1419 s = do_merging(that, s, &freed); 1420 that = find_data(s); 1421 ok = begin_data(that); 1422 } 1423 break; 1424 1425 case '*': 1426 CASE_TRACE(); 1427 if (!(ok & HAVE_PATH)) { 1428 int ddd, hour, minute, second; 1429 int day, month, year; 1430 char yrmon, monday; 1431 1432 /* check for tab-delimited first, so we can 1433 * accept filenames containing spaces. 1434 */ 1435 if (sscanf(buffer, 1436 "*** %[^\t]\t%[^ ] %[^ ] %d %d:%d:%d %d", 1437 b_fname, 1438 b_temp2, b_temp3, &ddd, 1439 &hour, &minute, &second, &year) == 8 1440 || (sscanf(buffer, 1441 "*** %[^\t]\t%d%c%d%c%d %d:%d:%d", 1442 b_fname, 1443 &year, &yrmon, &month, &monday, &day, 1444 &hour, &minute, &second) == 9 1445 && date_delims(yrmon, monday) 1446 && !version_num(b_fname)) 1447 || sscanf(buffer, 1448 "*** %[^\t ]%[\t ]%[^ ] %[^ ] %d %d:%d:%d %d", 1449 b_fname, 1450 b_temp1, 1451 b_temp2, b_temp3, &ddd, 1452 &hour, &minute, &second, &year) == 9 1453 || (sscanf(buffer, 1454 "*** %[^\t ]%[\t ]%d%c%d%c%d %d:%d:%d", 1455 b_fname, 1456 b_temp1, 1457 &year, &yrmon, &month, &monday, &day, 1458 &hour, &minute, &second) == 10 1459 && date_delims(yrmon, monday) 1460 && !version_num(b_fname)) 1461 || (sscanf(buffer, 1462 "*** %[^\t ]%[\t ]", 1463 b_fname, 1464 b_temp1) >= 1 1465 && !version_num(b_fname) 1466 && !contain_any(b_fname, "*") 1467 && !edit_range(b_fname)) 1468 ) { 1469 prev = that; 1470 finish_chunk(that); 1471 s = do_merging(that, b_fname, &freed); 1472 if (freed) 1473 prev = 0; 1474 that = find_data(s); 1475 ok = begin_data(that); 1476 TRACE(("** after merge:%d:%s\n", ok, s)); 1477 } 1478 } 1479 break; 1480 1481 case '=': 1482 CASE_TRACE(); 1483 if (!(ok & HAVE_PATH)) { 1484 int rev; 1485 1486 if (((sscanf(buffer, 1487 "==== %[^\t #]#%d - %[^\t ]", 1488 b_fname, 1489 &rev, 1490 b_temp1) == 3) 1491 || ((sscanf(buffer, 1492 "==== %[^\t #]#%d (%[^)]) - %[^\t ]", 1493 b_fname, 1494 &rev, 1495 b_temp1, 1496 b_temp2) == 4))) 1497 && !version_num(b_fname) 1498 && !contain_any(b_fname, "*") 1499 && !edit_range(b_fname)) { 1500 TRACE(("** found p4-diff\n")); 1501 prev = that; 1502 finish_chunk(that); 1503 s = do_merging(that, b_fname, &freed); 1504 if (freed) 1505 prev = 0; 1506 that = find_data(s); 1507 ok = begin_data(that); 1508 TRACE(("** after merge:%d:%s\n", ok, s)); 1509 } 1510 } 1511 break; 1512 1513 case '+': 1514 /* FALL-THRU */ 1515 case '>': 1516 CASE_TRACE(); 1517 if (ok) { 1518 update_chunk(that, cInsert); 1519 } 1520 break; 1521 1522 case '-': 1523 if (!ok) { 1524 CASE_TRACE(); 1525 break; 1526 } 1527 if (!unified && !strcmp(buffer, "---")) { 1528 CASE_TRACE(); 1529 break; 1530 } 1531 /* fall-thru */ 1532 case '<': 1533 CASE_TRACE(); 1534 if (ok) { 1535 update_chunk(that, cDelete); 1536 } 1537 break; 1538 1539 case '!': 1540 CASE_TRACE(); 1541 if (ok) { 1542 update_chunk(that, cModify); 1543 } 1544 break; 1545 1546 /* Expecting "Binary files XXX and YYY differ" */ 1547 case 'B': /* Binary */ 1548 /* FALL-THRU */ 1549 case 'b': /* binary */ 1550 CASE_TRACE(); 1551 if ((s = match(buffer + 1, "inary files ")) != 0) { 1552 char *first = skip_blanks(s); 1553 /* blindly assume the first filename does not contain " and " */ 1554 char *at_and = strstr(s, " and "); 1555 s = strrchr(buffer, BLANK); 1556 if ((at_and != NULL) && !strcmp(s, " differ")) { 1557 char *second = skip_blanks(at_and + 5); 1558 1559 if (reverse_opt) { 1560 *at_and = EOS; 1561 s = first; 1562 } else { 1563 *s = EOS; 1564 s = second; 1565 } 1566 blip('.'); 1567 finish_chunk(that); 1568 that = find_data(s); 1569 that->cmt = Binary; 1570 ok = HAVE_NOTHING; 1571 } 1572 } 1573 break; 1574 } 1575 } 1576 blip('\n'); 1577 1578 finish_chunk(that); 1579 finish_chunk(&dummy); 1580 if (buffer != 0) { 1581 free(buffer); 1582 free(b_fname); 1583 free(b_temp1); 1584 free(b_temp2); 1585 free(b_temp3); 1586 } 1587} 1588 1589static void 1590show_color(int color) 1591{ 1592 if (color >= 0) 1593 printf("\033[%dm", color + 30); 1594 else 1595 printf("\033[0;39m"); 1596} 1597 1598static long 1599plot_bar(long count, int c, int color) 1600{ 1601 long result = count; 1602 1603 if (show_colors && result != 0) 1604 show_color(color); 1605 1606 while (--count >= 0) 1607 (void) putchar(c); 1608 1609 if (show_colors && result != 0) 1610 show_color(-1); 1611 1612 return result; 1613} 1614 1615/* 1616 * Each call to 'plot_num()' prints a scaled bar of 'c' characters. The 1617 * 'extra' parameter is used to keep the accumulated error in the bar's total 1618 * length from getting large. 1619 */ 1620static long 1621plot_num(long num_value, int c, int color, long *extra) 1622{ 1623 long product; 1624 long result = 0; 1625 1626 /* the value to plot */ 1627 /* character to display in the bar */ 1628 /* accumulated error in the bar */ 1629 if (num_value) { 1630 product = (plot_width * num_value); 1631 result = ((product + *extra) / plot_scale); 1632 *extra = product - (result * plot_scale) - *extra; 1633 plot_bar(result, c, color); 1634 } 1635 return result; 1636} 1637 1638static long 1639plot_round1(const long num[MARKS]) 1640{ 1641 long result = 0; 1642 long scaled[MARKS]; 1643 long remain[MARKS]; 1644 long want = 0; 1645 long have = 0; 1646 long half = (plot_scale / 2); 1647 int i, j; 1648 1649 for_each_mark(i) { 1650 long product = (plot_width * num[i]); 1651 scaled[i] = (product / plot_scale); 1652 remain[i] = (product % plot_scale); 1653 want += product; 1654 have += product - remain[i]; 1655 } 1656 while (want > have) { 1657 j = -1; 1658 for_each_mark(i) { 1659 if (remain[i] != 0 1660 && (remain[i] > (j >= 0 ? remain[j] : half))) { 1661 j = i; 1662 } 1663 } 1664 if (j >= 0) { 1665 have += remain[j]; 1666 remain[j] = 0; 1667 scaled[j] += 1; 1668 } else { 1669 break; 1670 } 1671 } 1672 for_each_mark(i) { 1673 plot_bar(scaled[i], marks[i], colors[i]); 1674 result += scaled[i]; 1675 } 1676 return result; 1677} 1678 1679/* 1680 * Print a scaled bar of characters, where c[0] is for insertions, c[1] 1681 * for deletions and c[2] for modifications. The num array contains the 1682 * count for each type of change, in the same order. 1683 */ 1684static long 1685plot_round2(const long num[MARKS]) 1686{ 1687 long result = 0; 1688 long scaled[MARKS]; 1689 long remain[MARKS]; 1690 long total = 0; 1691 int i; 1692 1693 for (i = 0; i < MARKS; i++) 1694 total += num[i]; 1695 1696 if (total == 0) 1697 return result; 1698 1699 total = (total * plot_width + (plot_scale / 2)) / plot_scale; 1700 /* display at least one character */ 1701 if (total == 0) 1702 total++; 1703 1704 for_each_mark(i) { 1705 scaled[i] = num[i] * plot_width / plot_scale; 1706 remain[i] = num[i] * plot_width - scaled[i] * plot_scale; 1707 total -= scaled[i]; 1708 } 1709 1710 /* assign the missing chars using the largest remainder algo */ 1711 while (total) { 1712 int largest, largest_count; /* largest is a bit field */ 1713 long max_remain; 1714 1715 /* search for the largest remainder */ 1716 largest = largest_count = 0; 1717 max_remain = 0; 1718 for_each_mark(i) { 1719 if (remain[i] > max_remain) { 1720 largest = 1 << i; 1721 largest_count = 1; 1722 max_remain = remain[i]; 1723 } else if (remain[i] == max_remain) { /* ex aequo */ 1724 largest |= 1 << i; 1725 largest_count++; 1726 } 1727 } 1728 1729 /* if there are more greatest remainders than characters 1730 missing, don't assign them at all */ 1731 if (total < largest_count) 1732 break; 1733 1734 /* allocate the extra characters */ 1735 for_each_mark(i) { 1736 if (largest & (1 << i)) { 1737 scaled[i]++; 1738 total--; 1739 remain[i] -= plot_width; 1740 } 1741 } 1742 } 1743 1744 for_each_mark(i) { 1745 result += plot_bar(scaled[i], marks[i], colors[i]); 1746 } 1747 1748 return result; 1749} 1750 1751static void 1752plot_numbers(const DATA * p) 1753{ 1754 long temp = 0; 1755 long used = 0; 1756 int i; 1757 1758 printf("%5ld ", TotalOf(p)); 1759 1760 if (format_opt & FMT_VERBOSE) { 1761 printf("%5ld ", InsOf(p)); 1762 printf("%5ld ", DelOf(p)); 1763 printf("%5ld ", ModOf(p)); 1764 if (path_opt) 1765 printf("%5ld ", EqlOf(p)); 1766 } 1767 1768 if (format_opt == FMT_CONCISE) { 1769 for_each_mark(i) { 1770 printf("\t%ld %c", p->count[i], marks[i]); 1771 } 1772 } else { 1773 switch (round_opt) { 1774 default: 1775 for_each_mark(i) { 1776 used += plot_num(p->count[i], marks[i], colors[i], &temp); 1777 } 1778 break; 1779 case 1: 1780 used = plot_round1(p->count); 1781 break; 1782 1783 case 2: 1784 used = plot_round2(p->count); 1785 break; 1786 } 1787 1788 if ((format_opt & FMT_FILLED) != 0) { 1789 if (used > plot_width) 1790 printf("%ld", used - plot_width); /* oops */ 1791 else 1792 plot_bar(plot_width - used, '.', 0); 1793 } 1794 } 1795} 1796 1797#define changed(p) (!merge_names \ 1798 || (p)->cmt != Normal \ 1799 || (TotalOf(p)) != 0) 1800 1801static void 1802show_data(const DATA * p) 1803{ 1804 char *name = data_filename(p); 1805 int width; 1806 1807 if (summary_only) { 1808 ; 1809 } else if (!changed(p)) { 1810 ; 1811 } else if (p->cmt == Binary && suppress_binary == 1) { 1812 ; 1813 } else if (table_opt) { 1814 if (names_only) { 1815 printf("%s\n", name); 1816 } else { 1817 printf("%ld,%ld,%ld,", 1818 InsOf(p), 1819 DelOf(p), 1820 ModOf(p)); 1821 if (path_opt) 1822 printf("%ld,", EqlOf(p)); 1823 printf("%s\n", name); 1824 } 1825 } else if (names_only) { 1826 printf("%s\n", name); 1827 } else { 1828 printf("%s ", comment_opt); 1829 if (max_name_wide > 0 1830 && max_name_wide < min_name_wide 1831 && max_name_wide < ((width = (int) strlen(name)))) { 1832 printf("%.*s", max_name_wide, name + (width - max_name_wide)); 1833 } else { 1834 width = ((max_name_wide > 0 && max_name_wide < min_name_wide) 1835 ? max_name_wide 1836 : min_name_wide); 1837 printf("%-*.*s", width, width, name); 1838 } 1839 putchar('|'); 1840 switch (p->cmt) { 1841 default: 1842 case Normal: 1843 plot_numbers(p); 1844 break; 1845 case Binary: 1846 printf("binary"); 1847 break; 1848 case Only: 1849 printf("only"); 1850 break; 1851 } 1852 printf("\n"); 1853 } 1854} 1855 1856#ifdef HAVE_TSEARCH 1857static void 1858show_tsearch(const void *nodep, const VISIT which, const int depth) 1859{ 1860 const DATA *p = *(DATA * const *) nodep; 1861 (void) depth; 1862 if (which == postorder || which == leaf) 1863 show_data(p); 1864} 1865#endif 1866 1867static int 1868ignore_data(DATA * p) 1869{ 1870 return ((!changed(p)) 1871 || (p->cmt == Binary && suppress_binary)); 1872} 1873 1874static void 1875summarize(void) 1876{ 1877 DATA *p; 1878 long total_ins = 0; 1879 long total_del = 0; 1880 long total_mod = 0; 1881 long total_eql = 0; 1882 long temp; 1883 int num_files = 0, shortest_name = -1, longest_name = -1; 1884 1885 plot_scale = 0; 1886 for (p = all_data; p; p = p->link) { 1887 int len = (int) strlen(p->name); 1888 1889 if (ignore_data(p)) 1890 continue; 1891 1892 /* 1893 * If "-pX" option is given, prefix_opt is positive. 1894 * 1895 * "-p0" gives the whole pathname unmodified. "-p1" strips 1896 * through the first path-separator, etc. 1897 */ 1898 if (prefix_opt >= 0) { 1899 /* p->base has been computed at node creation */ 1900 if (min_name_wide < (len - p->base)) 1901 min_name_wide = (len - p->base); 1902 } else { 1903 /* 1904 * If "-pX" option is not given, strip off any prefix which is 1905 * shared by all of the names. 1906 */ 1907 if (len < prefix_len || prefix_len < 0) 1908 prefix_len = len; 1909 while (prefix_len > 0) { 1910 if (p->name[prefix_len - 1] != PATHSEP) 1911 prefix_len--; 1912 else if (strncmp(all_data->name, p->name, (size_t) prefix_len)) 1913 prefix_len--; 1914 else 1915 break; 1916 } 1917 1918 if (len > longest_name) 1919 longest_name = len; 1920 if (len < shortest_name || shortest_name < 0) 1921 shortest_name = len; 1922 } 1923 } 1924 1925 /* 1926 * Use a separate loop after computing prefix_len so we can apply the "-S" 1927 * or "-D" options to find files that we can use as reference for the 1928 * unchanged-count. 1929 */ 1930 for (p = all_data; p; p = p->link) { 1931 if (!ignore_data(p)) { 1932 EqlOf(p) = 0; 1933 if (reverse_opt) { 1934 int save_ins = InsOf(p); 1935 int save_del = DelOf(p); 1936 InsOf(p) = save_del; 1937 DelOf(p) = save_ins; 1938 } 1939 if (path_opt != 0) { 1940 int count = count_lines(p); 1941 1942 if (count >= 0) { 1943 EqlOf(p) = count - ModOf(p); 1944 if (path_dest != 0) { 1945 EqlOf(p) -= InsOf(p); 1946 } else { 1947 EqlOf(p) -= DelOf(p); 1948 } 1949 if (EqlOf(p) < 0) 1950 EqlOf(p) = 0; 1951 } 1952 } 1953 num_files++; 1954 total_ins += InsOf(p); 1955 total_del += DelOf(p); 1956 total_mod += ModOf(p); 1957 total_eql += EqlOf(p); 1958 temp = TotalOf(p); 1959 if (temp > plot_scale) 1960 plot_scale = temp; 1961 } 1962 } 1963 1964 if (prefix_opt < 0) { 1965 if (prefix_len < 0) 1966 prefix_len = 0; 1967 if ((longest_name - prefix_len) > min_name_wide) 1968 min_name_wide = (longest_name - prefix_len); 1969 } 1970 1971 min_name_wide++; /* make sure it's nonzero */ 1972 plot_width = (max_width - min_name_wide - 8); 1973 if (plot_width < 10) 1974 plot_width = 10; 1975 1976 if (plot_scale < plot_width) 1977 plot_scale = plot_width; /* 1:1 */ 1978 1979 if (table_opt) { 1980 if (!names_only) { 1981 printf("INSERTED,DELETED,MODIFIED,"); 1982 if (path_opt) 1983 printf("UNCHANGED,"); 1984 } 1985 printf("FILENAME\n"); 1986 } 1987#ifdef HAVE_TSEARCH 1988 if (use_tsearch) { 1989 twalk(sorted_data, show_tsearch); 1990 } else 1991#endif 1992 for (p = all_data; p; p = p->link) { 1993 show_data(p); 1994 } 1995 1996 if (!table_opt && !names_only) { 1997#define PLURAL(n) n, n != 1 ? "s" : "" 1998 if (num_files > 0 || !quiet) { 1999 printf("%s %d file%s changed", comment_opt, PLURAL(num_files)); 2000 if (total_ins) 2001 printf(", %ld insertion%s(+)", PLURAL(total_ins)); 2002 if (total_del) 2003 printf(", %ld deletion%s(-)", PLURAL(total_del)); 2004 if (total_mod) 2005 printf(", %ld modification%s(!)", PLURAL(total_mod)); 2006 if (total_eql && path_opt != 0) 2007 printf(", %ld unchanged line%s(=)", PLURAL(total_eql)); 2008 (void) putchar('\n'); 2009 } 2010 } 2011} 2012 2013#ifdef HAVE_POPEN 2014static const char * 2015get_program(const char *name, const char *dft) 2016{ 2017 const char *result = getenv(name); 2018 if (result == 0 || *result == EOS) 2019 result = dft; 2020 TRACE(("get_program(%s) = %s\n", name, result)); 2021 return result; 2022} 2023#define GET_PROGRAM(name) get_program("DIFFSTAT_" #name, name) 2024 2025static char * 2026decompressor(Decompress which, const char *name) 2027{ 2028 const char *verb = 0; 2029 const char *opts = ""; 2030 char *result = 0; 2031 size_t len = strlen(name); 2032 2033 switch (which) { 2034 case dcBzip: 2035 verb = GET_PROGRAM(BZCAT_PATH); 2036 if (*verb == '\0') { 2037 verb = GET_PROGRAM(BZIP2_PATH); 2038 opts = "-dc"; 2039 } 2040 break; 2041 case dcCompress: 2042 verb = GET_PROGRAM(ZCAT_PATH); 2043 if (*verb == '\0') { 2044 verb = GET_PROGRAM(UNCOMPRESS_PATH); 2045 opts = "-c"; 2046 if (*verb == '\0') { 2047 /* not all compress's recognize the options, test this last */ 2048 verb = GET_PROGRAM(COMPRESS_PATH); 2049 opts = "-dc"; 2050 } 2051 } 2052 break; 2053 case dcGzip: 2054 verb = GET_PROGRAM(GZIP_PATH); 2055 opts = "-dc"; 2056 break; 2057 case dcLzma: 2058 verb = GET_PROGRAM(LZCAT_PATH); 2059 opts = "-dc"; 2060 break; 2061 case dcPack: 2062 verb = GET_PROGRAM(PCAT_PATH); 2063 break; 2064 case dcXz: 2065 verb = GET_PROGRAM(XZ_PATH); 2066 opts = "-dc"; 2067 break; 2068 case dcEmpty: 2069 /* FALLTHRU */ 2070 case dcNone: 2071 break; 2072 } 2073 if (verb != 0 && *verb != '\0') { 2074 result = (char *) xmalloc(strlen(verb) + 10 + len); 2075 sprintf(result, "%s %s", verb, opts); 2076 if (*name != '\0') { 2077 sprintf(result + strlen(result), " \"%s\"", name); 2078 } 2079 } 2080 return result; 2081} 2082 2083static char * 2084is_compressed(const char *name) 2085{ 2086 size_t len = strlen(name); 2087 Decompress which; 2088 2089 if (len > 2 && !strcmp(name + len - 2, ".Z")) { 2090 which = dcCompress; 2091 } else if (len > 2 && !strcmp(name + len - 2, ".z")) { 2092 which = dcPack; 2093 } else if (len > 3 && !strcmp(name + len - 3, ".gz")) { 2094 which = dcGzip; 2095 } else if (len > 4 && !strcmp(name + len - 4, ".bz2")) { 2096 which = dcBzip; 2097 } else if (len > 5 && !strcmp(name + len - 5, ".lzma")) { 2098 which = dcLzma; 2099 } else if (len > 3 && !strcmp(name + len - 3, ".xz")) { 2100 which = dcXz; 2101 } else { 2102 which = dcNone; 2103 } 2104 return decompressor(which, name); 2105} 2106 2107#ifdef HAVE_MKDTEMP 2108#define MY_MKDTEMP(path) mkdtemp(path) 2109#else 2110/* 2111 * mktemp is supposedly marked obsolete at the same point that mkdtemp is 2112 * introduced. 2113 */ 2114static char * 2115my_mkdtemp(char *path) 2116{ 2117 char *result = mktemp(path); 2118 if (result != 0) { 2119 if (MKDIR(result, 0700) < 0) { 2120 result = 0; 2121 } 2122 } 2123 return path; 2124} 2125#define MY_MKDTEMP(path) my_mkdtemp(path) 2126#endif 2127 2128static char * 2129copy_stdin(char **dirpath) 2130{ 2131 const char *tmp = getenv("TMPDIR"); 2132 char *result = 0; 2133 int ch; 2134 FILE *fp; 2135 2136 if (tmp == 0) 2137 tmp = "/tmp/"; 2138 *dirpath = xmalloc(strlen(tmp) + 12); 2139 2140 strcpy(*dirpath, tmp); 2141 strcat(*dirpath, "/diffXXXXXX"); 2142 if (MY_MKDTEMP(*dirpath) != 0) { 2143 result = xmalloc(strlen(*dirpath) + 10); 2144 sprintf(result, "%s/stdin", *dirpath); 2145 2146 if ((fp = fopen(result, "w")) != 0) { 2147 while ((ch = MY_GETC(stdin)) != EOF) { 2148 fputc(ch, fp); 2149 } 2150 fclose(fp); 2151 } else { 2152 free(result); 2153 result = 0; 2154 rmdir(*dirpath); /* Assume that the /stdin file was not created */ 2155 free(*dirpath); 2156 *dirpath = 0; 2157 } 2158 } else { 2159 free(*dirpath); 2160 *dirpath = 0; 2161 } 2162 return result; 2163} 2164#endif 2165 2166static void 2167set_path_opt(char *value, int destination) 2168{ 2169 path_opt = value; 2170 path_dest = destination; 2171 if (*path_opt != 0) { 2172 if (is_dir(path_opt)) { 2173 num_marks = 4; 2174 } else { 2175 fprintf(stderr, "Not a directory:%s\n", path_opt); 2176 exit(EXIT_FAILURE); 2177 } 2178 } 2179} 2180 2181static void 2182usage(FILE *fp) 2183{ 2184 static const char *msg[] = 2185 { 2186 "Usage: diffstat [options] [files]", 2187 "", 2188 "Reads from one or more input files which contain output from 'diff',", 2189 "producing a histogram of total lines changed for each file referenced.", 2190 "If no filename is given on the command line, reads from standard input.", 2191 "", 2192 "Options:", 2193 " -c prefix each line with comment (#)", 2194#if OPT_TRACE 2195 " -d debug - prints a lot of information", 2196#endif 2197 " -D PATH specify location of patched files, use for unchanged-count", 2198 " -e FILE redirect standard error to FILE", 2199 " -f NUM format (0=concise, 1=normal, 2=filled, 4=values)", 2200 " -h print this message", 2201 " -k do not merge filenames", 2202 " -l list filenames only", 2203 " -m merge insert/delete data in chunks as modified-lines", 2204 " -n NUM specify minimum width for the filenames (default: auto)", 2205 " -N NUM specify maximum width for the filenames (default: auto)", 2206 " -o FILE redirect standard output to FILE", 2207 " -p NUM specify number of pathname-separators to strip (default: common)", 2208 " -q suppress the \"0 files changed\" message for empty diffs", 2209 " -r NUM specify rounding for histogram (0=none, 1=simple, 2=adjusted)", 2210 " -R assume patch was created with old and new files swapped", 2211 " -S PATH specify location of original files, use for unchanged-count", 2212 " -t print a table (comma-separated-values) rather than histogram", 2213 " -u do not sort the input list", 2214 " -v show progress if output is redirected to a file", 2215 " -V prints the version number", 2216 " -w NUM specify maximum width of the output (default: 80)", 2217 }; 2218 unsigned j; 2219 for (j = 0; j < sizeof(msg) / sizeof(msg[0]); j++) 2220 fprintf(fp, "%s\n", msg[j]); 2221} 2222 2223/* Wrapper around getopt that also parses "--help" and "--version". 2224 * argc, argv, opts, return value, and globals optarg, optind, 2225 * opterr, and optopt are as in getopt(). help and version designate 2226 * what should be returned if --help or --version are encountered. */ 2227static int 2228getopt_helper(int argc, char *const argv[], const char *opts, 2229 int help, int version) 2230{ 2231 if (optind < argc && argv[optind] != NULL) { 2232 if (strcmp(argv[optind], "--help") == 0) { 2233 optind++; 2234 return help; 2235 } else if (strcmp(argv[optind], "--version") == 0) { 2236 optind++; 2237 return version; 2238 } 2239 } 2240 return getopt(argc, argv, opts); 2241} 2242 2243int 2244main(int argc, char *argv[]) 2245{ 2246 int j; 2247 char version[80]; 2248 2249 max_width = 80; 2250 2251 while ((j = getopt_helper(argc, argv, 2252 "bcCdD:e:f:hklmn:N:o:p:qr:RsS:tuvVw:", 'h', 'V')) 2253 != -1) { 2254 switch (j) { 2255 case 'b': 2256 suppress_binary = 1; 2257 break; 2258 case 'c': 2259 comment_opt = "#"; 2260 break; 2261 case 'C': 2262 show_colors = 1; 2263 break; 2264#if OPT_TRACE 2265 case 'd': 2266 trace_opt = 1; 2267 break; 2268#endif 2269 case 'D': 2270 set_path_opt(optarg, 1); 2271 break; 2272 case 'e': 2273 if (freopen(optarg, "w", stderr) == 0) 2274 failed(optarg); 2275 break; 2276 case 'f': 2277 format_opt = atoi(optarg); 2278 break; 2279 case 'h': 2280 usage(stdout); 2281 return (EXIT_SUCCESS); 2282 case 'k': 2283 merge_names = 0; 2284 break; 2285 case 'l': 2286 names_only = 1; 2287 break; 2288 case 'm': 2289 merge_opt = 1; 2290 break; 2291 case 'n': 2292 min_name_wide = atoi(optarg); 2293 break; 2294 case 'N': 2295 max_name_wide = atoi(optarg); 2296 break; 2297 case 'o': 2298 if (freopen(optarg, "w", stdout) == 0) 2299 failed(optarg); 2300 break; 2301 case 'p': 2302 prefix_opt = atoi(optarg); 2303 break; 2304 case 'r': 2305 round_opt = atoi(optarg); 2306 break; 2307 case 'R': 2308 reverse_opt = 1; 2309 break; 2310 case 's': 2311 summary_only = 1; 2312 break; 2313 case 'S': 2314 set_path_opt(optarg, 0); 2315 break; 2316 case 't': 2317 table_opt = 1; 2318 break; 2319 case 'u': 2320 sort_names = 0; 2321 break; 2322 case 'v': 2323 verbose = 1; 2324 break; 2325 case 'V': 2326#ifndef NO_IDENT 2327 if (!sscanf(Id, "%*s %*s %s", version)) 2328#endif 2329 (void) strcpy(version, "?"); 2330 printf("diffstat version %s\n", version); 2331 return (EXIT_SUCCESS); 2332 case 'w': 2333 max_width = atoi(optarg); 2334 break; 2335 case 'q': 2336 quiet = 1; 2337 break; 2338 default: 2339 usage(stderr); 2340 return (EXIT_FAILURE); 2341 } 2342 } 2343 2344 /* 2345 * The numbers from -S/-D options will only be useful if the merge option 2346 * is added. 2347 */ 2348 if (path_opt) 2349 merge_opt = 1; 2350 2351 show_progress = verbose && (!isatty(fileno(stdout)) 2352 && isatty(fileno(stderr))); 2353 2354#ifdef HAVE_TSEARCH 2355 use_tsearch = (sort_names && merge_names); 2356#endif 2357 2358 if (optind < argc) { 2359 while (optind < argc) { 2360 FILE *fp; 2361 char *name = argv[optind++]; 2362#ifdef HAVE_POPEN 2363 char *command = is_compressed(name); 2364 if (command != 0) { 2365 if ((fp = popen(command, "r")) != 0) { 2366 if (show_progress) { 2367 (void) fprintf(stderr, "%s\n", name); 2368 (void) fflush(stderr); 2369 } 2370 do_file(fp, name); 2371 (void) pclose(fp); 2372 } 2373 free(command); 2374 } else 2375#endif 2376 if ((fp = fopen(name, "rb")) != 0) { 2377 if (show_progress) { 2378 (void) fprintf(stderr, "%s\n", name); 2379 (void) fflush(stderr); 2380 } 2381 do_file(fp, name); 2382 (void) fclose(fp); 2383 } else { 2384 failed(name); 2385 } 2386 } 2387 } else { 2388#ifdef HAVE_POPEN 2389 FILE *fp; 2390 Decompress which = dcEmpty; 2391 char *stdin_dir = 0; 2392 char *myfile; 2393 char sniff[8]; 2394 int ch; 2395 unsigned got = 0; 2396 char *command; 2397 2398 if ((ch = MY_GETC(stdin)) != EOF) { 2399 which = dcNone; 2400 if (ch == 'B') { /* perhaps bzip2 (poor magic design...) */ 2401 sniff[got++] = (char) ch; 2402 while (got < 5) { 2403 if ((ch = MY_GETC(stdin)) == EOF) 2404 break; 2405 sniff[got++] = (char) ch; 2406 } 2407 if (got == 5 2408 && !strncmp(sniff, "BZh", (size_t) 3) 2409 && isdigit((unsigned char) sniff[3]) 2410 && isdigit((unsigned char) sniff[4])) { 2411 which = dcBzip; 2412 } 2413 } else if (ch == ']') { /* perhaps lzma */ 2414 sniff[got++] = (char) ch; 2415 while (got < 4) { 2416 if ((ch = MY_GETC(stdin)) == EOF) 2417 break; 2418 sniff[got++] = (char) ch; 2419 } 2420 if (got == 4 2421 && !memcmp(sniff, "]\0\0\200", (size_t) 4)) { 2422 which = dcLzma; 2423 } 2424 } else if (ch == 0xfd) { /* perhaps xz */ 2425 sniff[got++] = (char) ch; 2426 while (got < 6) { 2427 if ((ch = MY_GETC(stdin)) == EOF) 2428 break; 2429 sniff[got++] = (char) ch; 2430 } 2431 if (got == 6 2432 && !memcmp(sniff, "\3757zXZ\0", (size_t) 6)) { 2433 which = dcXz; 2434 } 2435 } else if (ch == '\037') { /* perhaps compress, etc. */ 2436 sniff[got++] = (char) ch; 2437 if ((ch = MY_GETC(stdin)) != EOF) { 2438 sniff[got++] = (char) ch; 2439 switch (ch) { 2440 case 0213: 2441 which = dcGzip; 2442 break; 2443 case 0235: 2444 which = dcCompress; 2445 break; 2446 case 0036: 2447 which = dcPack; 2448 break; 2449 } 2450 } 2451 } else { 2452 sniff[got++] = (char) ch; 2453 } 2454 } 2455 /* 2456 * The C standard only guarantees one ungetc; 2457 * virtually everyone allows more. 2458 */ 2459 while (got != 0) { 2460 ungetc(sniff[--got], stdin); 2461 } 2462 if (which != dcNone 2463 && which != dcEmpty 2464 && (myfile = copy_stdin(&stdin_dir)) != 0) { 2465 2466 /* open pipe to decompress temporary file */ 2467 command = decompressor(which, myfile); 2468 if ((fp = popen(command, "r")) != 0) { 2469 do_file(fp, "stdin"); 2470 (void) pclose(fp); 2471 } 2472 free(command); 2473 2474 unlink(myfile); 2475 free(myfile); 2476 myfile = 0; 2477 rmdir(stdin_dir); 2478 free(stdin_dir); 2479 stdin_dir = 0; 2480 } else if (which != dcEmpty) 2481#endif 2482 do_file(stdin, "stdin"); 2483 } 2484 summarize(); 2485#if defined(NO_LEAKS) 2486 while (all_data != 0) { 2487 delink(all_data); 2488 } 2489#endif 2490 return (EXIT_SUCCESS); 2491} 2492