1/* Support routines for GNU DIFF. 2 3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002 4 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 GNU DIFF is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 GNU DIFF is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; see the file COPYING. 20 If not, write to the Free Software Foundation, 21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23#include "diff.h" 24#include <dirname.h> 25#include <error.h> 26#include <quotesys.h> 27#include <regex.h> 28#include <xalloc.h> 29 30char const pr_program[] = PR_PROGRAM; 31 32/* Queue up one-line messages to be printed at the end, 33 when -l is specified. Each message is recorded with a `struct msg'. */ 34 35struct msg 36{ 37 struct msg *next; 38 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */ 39}; 40 41/* Head of the chain of queues messages. */ 42 43static struct msg *msg_chain; 44 45/* Tail of the chain of queues messages. */ 46 47static struct msg **msg_chain_end = &msg_chain; 48 49/* Use when a system call returns non-zero status. 50 NAME should normally be the file name. */ 51 52void 53perror_with_name (char const *name) 54{ 55 error (0, errno, "%s", name); 56} 57 58/* Use when a system call returns non-zero status and that is fatal. */ 59 60void 61pfatal_with_name (char const *name) 62{ 63 int e = errno; 64 print_message_queue (); 65 error (EXIT_TROUBLE, e, "%s", name); 66 abort (); 67} 68 69/* Print an error message containing MSGID, then exit. */ 70 71void 72fatal (char const *msgid) 73{ 74 print_message_queue (); 75 error (EXIT_TROUBLE, 0, "%s", _(msgid)); 76 abort (); 77} 78 79/* Like printf, except if -l in effect then save the message and print later. 80 This is used for things like "Only in ...". */ 81 82void 83message (char const *format_msgid, char const *arg1, char const *arg2) 84{ 85 message5 (format_msgid, arg1, arg2, 0, 0); 86} 87 88void 89message5 (char const *format_msgid, char const *arg1, char const *arg2, 90 char const *arg3, char const *arg4) 91{ 92 if (paginate) 93 { 94 char *p; 95 char const *arg[5]; 96 int i; 97 size_t size[5]; 98 size_t total_size = offsetof (struct msg, args); 99 struct msg *new; 100 101 arg[0] = format_msgid; 102 arg[1] = arg1; 103 arg[2] = arg2; 104 arg[3] = arg3 ? arg3 : ""; 105 arg[4] = arg4 ? arg4 : ""; 106 107 for (i = 0; i < 5; i++) 108 total_size += size[i] = strlen (arg[i]) + 1; 109 110 new = xmalloc (total_size); 111 112 for (i = 0, p = new->args; i < 5; p += size[i++]) 113 memcpy (p, arg[i], size[i]); 114 115 *msg_chain_end = new; 116 new->next = 0; 117 msg_chain_end = &new->next; 118 } 119 else 120 { 121 if (sdiff_merge_assist) 122 putchar (' '); 123 printf (_(format_msgid), arg1, arg2, arg3, arg4); 124 } 125} 126 127/* Output all the messages that were saved up by calls to `message'. */ 128 129void 130print_message_queue (void) 131{ 132 char const *arg[5]; 133 int i; 134 struct msg *m = msg_chain; 135 136 while (m) 137 { 138 struct msg *next = m->next; 139 arg[0] = m->args; 140 for (i = 0; i < 4; i++) 141 arg[i + 1] = arg[i] + strlen (arg[i]) + 1; 142 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]); 143 free (m); 144 m = next; 145 } 146} 147 148/* Call before outputting the results of comparing files NAME0 and NAME1 149 to set up OUTFILE, the stdio stream for the output to go to. 150 151 Usually, OUTFILE is just stdout. But when -l was specified 152 we fork off a `pr' and make OUTFILE a pipe to it. 153 `pr' then outputs to our stdout. */ 154 155static char const *current_name0; 156static char const *current_name1; 157static bool currently_recursive; 158 159void 160setup_output (char const *name0, char const *name1, bool recursive) 161{ 162 current_name0 = name0; 163 current_name1 = name1; 164 currently_recursive = recursive; 165 outfile = 0; 166} 167 168#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK 169static pid_t pr_pid; 170#endif 171 172void 173begin_output (void) 174{ 175 char *name; 176 177 if (outfile != 0) 178 return; 179 180 /* Construct the header of this piece of diff. */ 181 name = xmalloc (strlen (current_name0) + strlen (current_name1) 182 + strlen (switch_string) + 7); 183 184 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in 185 the standard: it says that we must print only the last component 186 of the pathnames, and it requires two spaces after "diff" if 187 there are no options. These requirements are silly and do not 188 match historical practice. */ 189 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1); 190 191 if (paginate) 192 { 193 if (fflush (stdout) != 0) 194 pfatal_with_name (_("write failed")); 195 196 /* Make OUTFILE a pipe to a subsidiary `pr'. */ 197 { 198#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK 199 int pipes[2]; 200 201 if (pipe (pipes) != 0) 202 pfatal_with_name ("pipe"); 203 204 pr_pid = vfork (); 205 if (pr_pid < 0) 206 pfatal_with_name ("fork"); 207 208 if (pr_pid == 0) 209 { 210 close (pipes[1]); 211 if (pipes[0] != STDIN_FILENO) 212 { 213 if (dup2 (pipes[0], STDIN_FILENO) < 0) 214 pfatal_with_name ("dup2"); 215 close (pipes[0]); 216 } 217 218 execl (pr_program, pr_program, "-h", name, 0); 219 _exit (errno == ENOEXEC ? 126 : 127); 220 } 221 else 222 { 223 close (pipes[0]); 224 outfile = fdopen (pipes[1], "w"); 225 if (!outfile) 226 pfatal_with_name ("fdopen"); 227 } 228#else 229 char *command = xmalloc (sizeof pr_program - 1 + 7 230 + quote_system_arg ((char *) 0, name) + 1); 231 char *p; 232 sprintf (command, "%s -f -h ", pr_program); 233 p = command + sizeof pr_program - 1 + 7; 234 p += quote_system_arg (p, name); 235 *p = 0; 236 errno = 0; 237 outfile = popen (command, "w"); 238 if (!outfile) 239 pfatal_with_name (command); 240 free (command); 241#endif 242 } 243 } 244 else 245 { 246 247 /* If -l was not specified, output the diff straight to `stdout'. */ 248 249 outfile = stdout; 250 251 /* If handling multiple files (because scanning a directory), 252 print which files the following output is about. */ 253 if (currently_recursive) 254 printf ("%s\n", name); 255 } 256 257 free (name); 258 259 /* A special header is needed at the beginning of context output. */ 260 switch (output_style) 261 { 262 case OUTPUT_CONTEXT: 263 print_context_header (files, 0); 264 break; 265 266 case OUTPUT_UNIFIED: 267 print_context_header (files, 1); 268 break; 269 270 default: 271 break; 272 } 273} 274 275/* Call after the end of output of diffs for one file. 276 Close OUTFILE and get rid of the `pr' subfork. */ 277 278void 279finish_output (void) 280{ 281 if (outfile != 0 && outfile != stdout) 282 { 283 int wstatus; 284 int werrno = 0; 285 if (ferror (outfile)) 286 fatal ("write failed"); 287#if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK) 288 wstatus = pclose (outfile); 289 if (wstatus == -1) 290 werrno = errno; 291#else 292 if (fclose (outfile) != 0) 293 pfatal_with_name (_("write failed")); 294 if (waitpid (pr_pid, &wstatus, 0) < 0) 295 pfatal_with_name ("waitpid"); 296#endif 297 if (! werrno && WIFEXITED (wstatus) && WEXITSTATUS (wstatus) == 127) 298 error (EXIT_TROUBLE, 0, _("subsidiary program `%s' not found"), 299 pr_program); 300 if (wstatus != 0) 301 error (EXIT_TROUBLE, werrno, _("subsidiary program `%s' failed"), 302 pr_program); 303 } 304 305 outfile = 0; 306} 307 308/* Compare two lines (typically one from each input file) 309 according to the command line options. 310 For efficiency, this is invoked only when the lines do not match exactly 311 but an option like -i might cause us to ignore the difference. 312 Return nonzero if the lines differ. */ 313 314bool 315lines_differ (char const *s1, char const *s2) 316{ 317 register unsigned char const *t1 = (unsigned char const *) s1; 318 register unsigned char const *t2 = (unsigned char const *) s2; 319 size_t column = 0; 320 321 while (1) 322 { 323 register unsigned char c1 = *t1++; 324 register unsigned char c2 = *t2++; 325 326 /* Test for exact char equality first, since it's a common case. */ 327 if (c1 != c2) 328 { 329 switch (ignore_white_space) 330 { 331 case IGNORE_ALL_SPACE: 332 /* For -w, just skip past any white space. */ 333 while (ISSPACE (c1) && c1 != '\n') c1 = *t1++; 334 while (ISSPACE (c2) && c2 != '\n') c2 = *t2++; 335 break; 336 337 case IGNORE_SPACE_CHANGE: 338 /* For -b, advance past any sequence of white space in 339 line 1 and consider it just one space, or nothing at 340 all if it is at the end of the line. */ 341 if (ISSPACE (c1)) 342 { 343 while (c1 != '\n') 344 { 345 c1 = *t1++; 346 if (! ISSPACE (c1)) 347 { 348 --t1; 349 c1 = ' '; 350 break; 351 } 352 } 353 } 354 355 /* Likewise for line 2. */ 356 if (ISSPACE (c2)) 357 { 358 while (c2 != '\n') 359 { 360 c2 = *t2++; 361 if (! ISSPACE (c2)) 362 { 363 --t2; 364 c2 = ' '; 365 break; 366 } 367 } 368 } 369 370 if (c1 != c2) 371 { 372 /* If we went too far when doing the simple test 373 for equality, go back to the first non-white-space 374 character in both sides and try again. */ 375 if (c2 == ' ' && c1 != '\n' 376 && (unsigned char const *) s1 + 1 < t1 377 && ISSPACE (t1[-2])) 378 { 379 --t1; 380 continue; 381 } 382 if (c1 == ' ' && c2 != '\n' 383 && (unsigned char const *) s2 + 1 < t2 384 && ISSPACE (t2[-2])) 385 { 386 --t2; 387 continue; 388 } 389 } 390 391 break; 392 393 case IGNORE_TAB_EXPANSION: 394 if ((c1 == ' ' && c2 == '\t') 395 || (c1 == '\t' && c2 == ' ')) 396 { 397 size_t column2 = column; 398 for (;; c1 = *t1++) 399 { 400 if (c1 == ' ') 401 column++; 402 else if (c1 == '\t') 403 column += TAB_WIDTH - column % TAB_WIDTH; 404 else 405 break; 406 } 407 for (;; c2 = *t2++) 408 { 409 if (c2 == ' ') 410 column2++; 411 else if (c2 == '\t') 412 column2 += TAB_WIDTH - column2 % TAB_WIDTH; 413 else 414 break; 415 } 416 if (column != column2) 417 return 1; 418 } 419 break; 420 421 case IGNORE_NO_WHITE_SPACE: 422 break; 423 } 424 425 /* Lowercase all letters if -i is specified. */ 426 427 if (ignore_case) 428 { 429 c1 = TOLOWER (c1); 430 c2 = TOLOWER (c2); 431 } 432 433 if (c1 != c2) 434 break; 435 } 436 if (c1 == '\n') 437 return 0; 438 439 column += c1 == '\t' ? TAB_WIDTH - column % TAB_WIDTH : 1; 440 } 441 442 return 1; 443} 444 445/* Find the consecutive changes at the start of the script START. 446 Return the last link before the first gap. */ 447 448struct change * 449find_change (struct change *start) 450{ 451 return start; 452} 453 454struct change * 455find_reverse_change (struct change *start) 456{ 457 return start; 458} 459 460/* Divide SCRIPT into pieces by calling HUNKFUN and 461 print each piece with PRINTFUN. 462 Both functions take one arg, an edit script. 463 464 HUNKFUN is called with the tail of the script 465 and returns the last link that belongs together with the start 466 of the tail. 467 468 PRINTFUN takes a subscript which belongs together (with a null 469 link at the end) and prints it. */ 470 471void 472print_script (struct change *script, 473 struct change * (*hunkfun) (struct change *), 474 void (*printfun) (struct change *)) 475{ 476 struct change *next = script; 477 478 while (next) 479 { 480 struct change *this, *end; 481 482 /* Find a set of changes that belong together. */ 483 this = next; 484 end = (*hunkfun) (next); 485 486 /* Disconnect them from the rest of the changes, 487 making them a hunk, and remember the rest for next iteration. */ 488 next = end->link; 489 end->link = 0; 490#ifdef DEBUG 491 debug_script (this); 492#endif 493 494 /* Print this hunk. */ 495 (*printfun) (this); 496 497 /* Reconnect the script so it will all be freed properly. */ 498 end->link = next; 499 } 500} 501 502/* Print the text of a single line LINE, 503 flagging it with the characters in LINE_FLAG (which say whether 504 the line is inserted, deleted, changed, etc.). */ 505 506void 507print_1_line (char const *line_flag, char const *const *line) 508{ 509 char const *base = line[0], *limit = line[1]; /* Help the compiler. */ 510 FILE *out = outfile; /* Help the compiler some more. */ 511 char const *flag_format = 0; 512 513 /* If -T was specified, use a Tab between the line-flag and the text. 514 Otherwise use a Space (as Unix diff does). 515 Print neither space nor tab if line-flags are empty. */ 516 517 if (line_flag && *line_flag) 518 { 519 flag_format = initial_tab ? "%s\t" : "%s "; 520 fprintf (out, flag_format, line_flag); 521 } 522 523 output_1_line (base, limit, flag_format, line_flag); 524 525 if ((!line_flag || line_flag[0]) && limit[-1] != '\n') 526 fprintf (out, "\n\\ %s\n", _("No newline at end of file")); 527} 528 529/* Output a line from BASE up to LIMIT. 530 With -t, expand white space characters to spaces, and if FLAG_FORMAT 531 is nonzero, output it with argument LINE_FLAG after every 532 internal carriage return, so that tab stops continue to line up. */ 533 534void 535output_1_line (char const *base, char const *limit, char const *flag_format, 536 char const *line_flag) 537{ 538 if (!expand_tabs) 539 fwrite (base, limit - base, 1, outfile); 540 else 541 { 542 register FILE *out = outfile; 543 register unsigned char c; 544 register char const *t = base; 545 register unsigned int column = 0; 546 547 while (t < limit) 548 switch ((c = *t++)) 549 { 550 case '\t': 551 { 552 unsigned int spaces = TAB_WIDTH - column % TAB_WIDTH; 553 column += spaces; 554 do 555 putc (' ', out); 556 while (--spaces); 557 } 558 break; 559 560 case '\r': 561 putc (c, out); 562 if (flag_format && t < limit && *t != '\n') 563 fprintf (out, flag_format, line_flag); 564 column = 0; 565 break; 566 567 case '\b': 568 if (column == 0) 569 continue; 570 column--; 571 putc (c, out); 572 break; 573 574 default: 575 if (ISPRINT (c)) 576 column++; 577 putc (c, out); 578 break; 579 } 580 } 581} 582 583char const change_letter[] = { 0, 'd', 'a', 'c' }; 584 585/* Translate an internal line number (an index into diff's table of lines) 586 into an actual line number in the input file. 587 The internal line number is I. FILE points to the data on the file. 588 589 Internal line numbers count from 0 starting after the prefix. 590 Actual line numbers count from 1 within the entire file. */ 591 592lin 593translate_line_number (struct file_data const *file, lin i) 594{ 595 return i + file->prefix_lines + 1; 596} 597 598/* Translate a line number range. This is always done for printing, 599 so for convenience translate to long rather than lin, so that the 600 caller can use printf with "%ld" without casting. */ 601 602void 603translate_range (struct file_data const *file, 604 lin a, lin b, 605 long *aptr, long *bptr) 606{ 607 *aptr = translate_line_number (file, a - 1) + 1; 608 *bptr = translate_line_number (file, b + 1) - 1; 609} 610 611/* Print a pair of line numbers with SEPCHAR, translated for file FILE. 612 If the two numbers are identical, print just one number. 613 614 Args A and B are internal line numbers. 615 We print the translated (real) line numbers. */ 616 617void 618print_number_range (char sepchar, struct file_data *file, lin a, lin b) 619{ 620 long trans_a, trans_b; 621 translate_range (file, a, b, &trans_a, &trans_b); 622 623 /* Note: we can have B < A in the case of a range of no lines. 624 In this case, we should print the line number before the range, 625 which is B. */ 626 if (trans_b > trans_a) 627 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b); 628 else 629 fprintf (outfile, "%ld", trans_b); 630} 631 632/* Look at a hunk of edit script and report the range of lines in each file 633 that it applies to. HUNK is the start of the hunk, which is a chain 634 of `struct change'. The first and last line numbers of file 0 are stored in 635 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1. 636 Note that these are internal line numbers that count from 0. 637 638 If no lines from file 0 are deleted, then FIRST0 is LAST0+1. 639 640 Return UNCHANGED if only ignorable lines are inserted or deleted, 641 OLD if lines of file 0 are deleted, 642 NEW if lines of file 1 are inserted, 643 and CHANGED if both kinds of changes are found. */ 644 645enum changes 646analyze_hunk (struct change *hunk, 647 lin *first0, lin *last0, 648 lin *first1, lin *last1) 649{ 650 struct change *next; 651 lin l0, l1; 652 lin show_from, show_to; 653 lin i; 654 bool trivial = ignore_blank_lines || ignore_regexp.fastmap; 655 size_t trivial_length = (int) ignore_blank_lines - 1; 656 /* If 0, ignore zero-length lines; 657 if SIZE_MAX, do not ignore lines just because of their length. */ 658 659 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */ 660 char const * const *linbuf1 = files[1].linbuf; 661 662 show_from = show_to = 0; 663 664 *first0 = hunk->line0; 665 *first1 = hunk->line1; 666 667 next = hunk; 668 do 669 { 670 l0 = next->line0 + next->deleted - 1; 671 l1 = next->line1 + next->inserted - 1; 672 show_from += next->deleted; 673 show_to += next->inserted; 674 675 for (i = next->line0; i <= l0 && trivial; i++) 676 { 677 char const *line = linbuf0[i]; 678 size_t len = linbuf0[i + 1] - line - 1; 679 if (len != trivial_length 680 && (! ignore_regexp.fastmap 681 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) 682 trivial = 0; 683 } 684 685 for (i = next->line1; i <= l1 && trivial; i++) 686 { 687 char const *line = linbuf1[i]; 688 size_t len = linbuf1[i + 1] - line - 1; 689 if (len != trivial_length 690 && (! ignore_regexp.fastmap 691 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) 692 trivial = 0; 693 } 694 } 695 while ((next = next->link) != 0); 696 697 *last0 = l0; 698 *last1 = l1; 699 700 /* If all inserted or deleted lines are ignorable, 701 tell the caller to ignore this hunk. */ 702 703 if (trivial) 704 return UNCHANGED; 705 706 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED); 707} 708 709/* Concatenate three strings, returning a newly malloc'd string. */ 710 711char * 712concat (char const *s1, char const *s2, char const *s3) 713{ 714 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1); 715 sprintf (new, "%s%s%s", s1, s2, s3); 716 return new; 717} 718 719/* Yield a new block of SIZE bytes, initialized to zero. */ 720 721void * 722zalloc (size_t size) 723{ 724 void *p = xmalloc (size); 725 memset (p, 0, size); 726 return p; 727} 728 729/* Yield the newly malloc'd pathname 730 of the file in DIR whose filename is FILE. */ 731 732char * 733dir_file_pathname (char const *dir, char const *file) 734{ 735 char const *base = base_name (dir); 736 bool omit_slash = !*base || base[strlen (base) - 1] == '/'; 737 return concat (dir, "/" + omit_slash, file); 738} 739 740void 741debug_script (struct change *sp) 742{ 743 fflush (stdout); 744 745 for (; sp; sp = sp->link) 746 { 747 long line0 = sp->line0; 748 long line1 = sp->line1; 749 long deleted = sp->deleted; 750 long inserted = sp->inserted; 751 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n", 752 line0, line1, deleted, inserted); 753 } 754 755 fflush (stderr); 756} 757