1/* Support routines for GNU DIFF.
2
3   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
4   Free Software Foundation, Inc.
5
6   This file is part of GNU DIFF.
7
8   GNU DIFF is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2, or (at your option)
11   any later version.
12
13   GNU DIFF is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; see the file COPYING.
20   If not, write to the Free Software Foundation,
21   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23#include "diff.h"
24#include <dirname.h>
25#include <error.h>
26#include <quotesys.h>
27#include <regex.h>
28#include <xalloc.h>
29
30char const pr_program[] = PR_PROGRAM;
31
32/* Queue up one-line messages to be printed at the end,
33   when -l is specified.  Each message is recorded with a `struct msg'.  */
34
35struct msg
36{
37  struct msg *next;
38  char args[1]; /* Format + 4 args, each '\0' terminated, concatenated.  */
39};
40
41/* Head of the chain of queues messages.  */
42
43static struct msg *msg_chain;
44
45/* Tail of the chain of queues messages.  */
46
47static struct msg **msg_chain_end = &msg_chain;
48
49/* Use when a system call returns non-zero status.
50   NAME should normally be the file name.  */
51
52void
53perror_with_name (char const *name)
54{
55  error (0, errno, "%s", name);
56}
57
58/* Use when a system call returns non-zero status and that is fatal.  */
59
60void
61pfatal_with_name (char const *name)
62{
63  int e = errno;
64  print_message_queue ();
65  error (EXIT_TROUBLE, e, "%s", name);
66  abort ();
67}
68
69/* Print an error message containing MSGID, then exit.  */
70
71void
72fatal (char const *msgid)
73{
74  print_message_queue ();
75  error (EXIT_TROUBLE, 0, "%s", _(msgid));
76  abort ();
77}
78
79/* Like printf, except if -l in effect then save the message and print later.
80   This is used for things like "Only in ...".  */
81
82void
83message (char const *format_msgid, char const *arg1, char const *arg2)
84{
85  message5 (format_msgid, arg1, arg2, 0, 0);
86}
87
88void
89message5 (char const *format_msgid, char const *arg1, char const *arg2,
90	  char const *arg3, char const *arg4)
91{
92  if (paginate)
93    {
94      char *p;
95      char const *arg[5];
96      int i;
97      size_t size[5];
98      size_t total_size = offsetof (struct msg, args);
99      struct msg *new;
100
101      arg[0] = format_msgid;
102      arg[1] = arg1;
103      arg[2] = arg2;
104      arg[3] = arg3 ? arg3 : "";
105      arg[4] = arg4 ? arg4 : "";
106
107      for (i = 0;  i < 5;  i++)
108	total_size += size[i] = strlen (arg[i]) + 1;
109
110      new = xmalloc (total_size);
111
112      for (i = 0, p = new->args;  i < 5;  p += size[i++])
113	memcpy (p, arg[i], size[i]);
114
115      *msg_chain_end = new;
116      new->next = 0;
117      msg_chain_end = &new->next;
118    }
119  else
120    {
121      if (sdiff_merge_assist)
122	putchar (' ');
123      printf (_(format_msgid), arg1, arg2, arg3, arg4);
124    }
125}
126
127/* Output all the messages that were saved up by calls to `message'.  */
128
129void
130print_message_queue (void)
131{
132  char const *arg[5];
133  int i;
134  struct msg *m = msg_chain;
135
136  while (m)
137    {
138      struct msg *next = m->next;
139      arg[0] = m->args;
140      for (i = 0;  i < 4;  i++)
141	arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
142      printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
143      free (m);
144      m = next;
145    }
146}
147
148/* Call before outputting the results of comparing files NAME0 and NAME1
149   to set up OUTFILE, the stdio stream for the output to go to.
150
151   Usually, OUTFILE is just stdout.  But when -l was specified
152   we fork off a `pr' and make OUTFILE a pipe to it.
153   `pr' then outputs to our stdout.  */
154
155static char const *current_name0;
156static char const *current_name1;
157static bool currently_recursive;
158
159void
160setup_output (char const *name0, char const *name1, bool recursive)
161{
162  current_name0 = name0;
163  current_name1 = name1;
164  currently_recursive = recursive;
165  outfile = 0;
166}
167
168#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
169static pid_t pr_pid;
170#endif
171
172void
173begin_output (void)
174{
175  char *name;
176
177  if (outfile != 0)
178    return;
179
180  /* Construct the header of this piece of diff.  */
181  name = xmalloc (strlen (current_name0) + strlen (current_name1)
182		  + strlen (switch_string) + 7);
183
184  /* POSIX 1003.1-2001 specifies this format.  But there are some bugs in
185     the standard: it says that we must print only the last component
186     of the pathnames, and it requires two spaces after "diff" if
187     there are no options.  These requirements are silly and do not
188     match historical practice.  */
189  sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
190
191  if (paginate)
192    {
193      if (fflush (stdout) != 0)
194	pfatal_with_name (_("write failed"));
195
196      /* Make OUTFILE a pipe to a subsidiary `pr'.  */
197      {
198#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
199	int pipes[2];
200
201	if (pipe (pipes) != 0)
202	  pfatal_with_name ("pipe");
203
204	pr_pid = vfork ();
205	if (pr_pid < 0)
206	  pfatal_with_name ("fork");
207
208	if (pr_pid == 0)
209	  {
210	    close (pipes[1]);
211	    if (pipes[0] != STDIN_FILENO)
212	      {
213		if (dup2 (pipes[0], STDIN_FILENO) < 0)
214		  pfatal_with_name ("dup2");
215		close (pipes[0]);
216	      }
217
218	    execl (pr_program, pr_program, "-h", name, 0);
219	    _exit (errno == ENOEXEC ? 126 : 127);
220	  }
221	else
222	  {
223	    close (pipes[0]);
224	    outfile = fdopen (pipes[1], "w");
225	    if (!outfile)
226	      pfatal_with_name ("fdopen");
227	  }
228#else
229	char *command = xmalloc (sizeof pr_program - 1 + 7
230				 + quote_system_arg ((char *) 0, name) + 1);
231	char *p;
232	sprintf (command, "%s -f -h ", pr_program);
233	p = command + sizeof pr_program - 1 + 7;
234	p += quote_system_arg (p, name);
235	*p = 0;
236	errno = 0;
237	outfile = popen (command, "w");
238	if (!outfile)
239	  pfatal_with_name (command);
240	free (command);
241#endif
242      }
243    }
244  else
245    {
246
247      /* If -l was not specified, output the diff straight to `stdout'.  */
248
249      outfile = stdout;
250
251      /* If handling multiple files (because scanning a directory),
252	 print which files the following output is about.  */
253      if (currently_recursive)
254	printf ("%s\n", name);
255    }
256
257  free (name);
258
259  /* A special header is needed at the beginning of context output.  */
260  switch (output_style)
261    {
262    case OUTPUT_CONTEXT:
263      print_context_header (files, 0);
264      break;
265
266    case OUTPUT_UNIFIED:
267      print_context_header (files, 1);
268      break;
269
270    default:
271      break;
272    }
273}
274
275/* Call after the end of output of diffs for one file.
276   Close OUTFILE and get rid of the `pr' subfork.  */
277
278void
279finish_output (void)
280{
281  if (outfile != 0 && outfile != stdout)
282    {
283      int wstatus;
284      int werrno = 0;
285      if (ferror (outfile))
286	fatal ("write failed");
287#if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
288      wstatus = pclose (outfile);
289      if (wstatus == -1)
290	werrno = errno;
291#else
292      if (fclose (outfile) != 0)
293	pfatal_with_name (_("write failed"));
294      if (waitpid (pr_pid, &wstatus, 0) < 0)
295	pfatal_with_name ("waitpid");
296#endif
297      if (! werrno && WIFEXITED (wstatus) && WEXITSTATUS (wstatus) == 127)
298	error (EXIT_TROUBLE, 0, _("subsidiary program `%s' not found"),
299	       pr_program);
300      if (wstatus != 0)
301	error (EXIT_TROUBLE, werrno, _("subsidiary program `%s' failed"),
302	       pr_program);
303    }
304
305  outfile = 0;
306}
307
308/* Compare two lines (typically one from each input file)
309   according to the command line options.
310   For efficiency, this is invoked only when the lines do not match exactly
311   but an option like -i might cause us to ignore the difference.
312   Return nonzero if the lines differ.  */
313
314bool
315lines_differ (char const *s1, char const *s2)
316{
317  register unsigned char const *t1 = (unsigned char const *) s1;
318  register unsigned char const *t2 = (unsigned char const *) s2;
319  size_t column = 0;
320
321  while (1)
322    {
323      register unsigned char c1 = *t1++;
324      register unsigned char c2 = *t2++;
325
326      /* Test for exact char equality first, since it's a common case.  */
327      if (c1 != c2)
328	{
329	  switch (ignore_white_space)
330	    {
331	    case IGNORE_ALL_SPACE:
332	      /* For -w, just skip past any white space.  */
333	      while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
334	      while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
335	      break;
336
337	    case IGNORE_SPACE_CHANGE:
338	      /* For -b, advance past any sequence of white space in
339		 line 1 and consider it just one space, or nothing at
340		 all if it is at the end of the line.  */
341	      if (ISSPACE (c1))
342		{
343		  while (c1 != '\n')
344		    {
345		      c1 = *t1++;
346		      if (! ISSPACE (c1))
347			{
348			  --t1;
349			  c1 = ' ';
350			  break;
351			}
352		    }
353		}
354
355	      /* Likewise for line 2.  */
356	      if (ISSPACE (c2))
357		{
358		  while (c2 != '\n')
359		    {
360		      c2 = *t2++;
361		      if (! ISSPACE (c2))
362			{
363			  --t2;
364			  c2 = ' ';
365			  break;
366			}
367		    }
368		}
369
370	      if (c1 != c2)
371		{
372		  /* If we went too far when doing the simple test
373		     for equality, go back to the first non-white-space
374		     character in both sides and try again.  */
375		  if (c2 == ' ' && c1 != '\n'
376		      && (unsigned char const *) s1 + 1 < t1
377		      && ISSPACE (t1[-2]))
378		    {
379		      --t1;
380		      continue;
381		    }
382		  if (c1 == ' ' && c2 != '\n'
383		      && (unsigned char const *) s2 + 1 < t2
384		      && ISSPACE (t2[-2]))
385		    {
386		      --t2;
387		      continue;
388		    }
389		}
390
391	      break;
392
393	    case IGNORE_TAB_EXPANSION:
394	      if ((c1 == ' ' && c2 == '\t')
395		  || (c1 == '\t' && c2 == ' '))
396		{
397		  size_t column2 = column;
398		  for (;; c1 = *t1++)
399		    {
400		      if (c1 == ' ')
401			column++;
402		      else if (c1 == '\t')
403			column += TAB_WIDTH - column % TAB_WIDTH;
404		      else
405			break;
406		    }
407		  for (;; c2 = *t2++)
408		    {
409		      if (c2 == ' ')
410			column2++;
411		      else if (c2 == '\t')
412			column2 += TAB_WIDTH - column2 % TAB_WIDTH;
413		      else
414			break;
415		    }
416		  if (column != column2)
417		    return 1;
418		}
419	      break;
420
421	    case IGNORE_NO_WHITE_SPACE:
422	      break;
423	    }
424
425	  /* Lowercase all letters if -i is specified.  */
426
427	  if (ignore_case)
428	    {
429	      c1 = TOLOWER (c1);
430	      c2 = TOLOWER (c2);
431	    }
432
433	  if (c1 != c2)
434	    break;
435	}
436      if (c1 == '\n')
437	return 0;
438
439      column += c1 == '\t' ? TAB_WIDTH - column % TAB_WIDTH : 1;
440    }
441
442  return 1;
443}
444
445/* Find the consecutive changes at the start of the script START.
446   Return the last link before the first gap.  */
447
448struct change *
449find_change (struct change *start)
450{
451  return start;
452}
453
454struct change *
455find_reverse_change (struct change *start)
456{
457  return start;
458}
459
460/* Divide SCRIPT into pieces by calling HUNKFUN and
461   print each piece with PRINTFUN.
462   Both functions take one arg, an edit script.
463
464   HUNKFUN is called with the tail of the script
465   and returns the last link that belongs together with the start
466   of the tail.
467
468   PRINTFUN takes a subscript which belongs together (with a null
469   link at the end) and prints it.  */
470
471void
472print_script (struct change *script,
473	      struct change * (*hunkfun) (struct change *),
474	      void (*printfun) (struct change *))
475{
476  struct change *next = script;
477
478  while (next)
479    {
480      struct change *this, *end;
481
482      /* Find a set of changes that belong together.  */
483      this = next;
484      end = (*hunkfun) (next);
485
486      /* Disconnect them from the rest of the changes,
487	 making them a hunk, and remember the rest for next iteration.  */
488      next = end->link;
489      end->link = 0;
490#ifdef DEBUG
491      debug_script (this);
492#endif
493
494      /* Print this hunk.  */
495      (*printfun) (this);
496
497      /* Reconnect the script so it will all be freed properly.  */
498      end->link = next;
499    }
500}
501
502/* Print the text of a single line LINE,
503   flagging it with the characters in LINE_FLAG (which say whether
504   the line is inserted, deleted, changed, etc.).  */
505
506void
507print_1_line (char const *line_flag, char const *const *line)
508{
509  char const *base = line[0], *limit = line[1]; /* Help the compiler.  */
510  FILE *out = outfile; /* Help the compiler some more.  */
511  char const *flag_format = 0;
512
513  /* If -T was specified, use a Tab between the line-flag and the text.
514     Otherwise use a Space (as Unix diff does).
515     Print neither space nor tab if line-flags are empty.  */
516
517  if (line_flag && *line_flag)
518    {
519      flag_format = initial_tab ? "%s\t" : "%s ";
520      fprintf (out, flag_format, line_flag);
521    }
522
523  output_1_line (base, limit, flag_format, line_flag);
524
525  if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
526    fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
527}
528
529/* Output a line from BASE up to LIMIT.
530   With -t, expand white space characters to spaces, and if FLAG_FORMAT
531   is nonzero, output it with argument LINE_FLAG after every
532   internal carriage return, so that tab stops continue to line up.  */
533
534void
535output_1_line (char const *base, char const *limit, char const *flag_format,
536	       char const *line_flag)
537{
538  if (!expand_tabs)
539    fwrite (base, limit - base, 1, outfile);
540  else
541    {
542      register FILE *out = outfile;
543      register unsigned char c;
544      register char const *t = base;
545      register unsigned int column = 0;
546
547      while (t < limit)
548	switch ((c = *t++))
549	  {
550	  case '\t':
551	    {
552	      unsigned int spaces = TAB_WIDTH - column % TAB_WIDTH;
553	      column += spaces;
554	      do
555		putc (' ', out);
556	      while (--spaces);
557	    }
558	    break;
559
560	  case '\r':
561	    putc (c, out);
562	    if (flag_format && t < limit && *t != '\n')
563	      fprintf (out, flag_format, line_flag);
564	    column = 0;
565	    break;
566
567	  case '\b':
568	    if (column == 0)
569	      continue;
570	    column--;
571	    putc (c, out);
572	    break;
573
574	  default:
575	    if (ISPRINT (c))
576	      column++;
577	    putc (c, out);
578	    break;
579	  }
580    }
581}
582
583char const change_letter[] = { 0, 'd', 'a', 'c' };
584
585/* Translate an internal line number (an index into diff's table of lines)
586   into an actual line number in the input file.
587   The internal line number is I.  FILE points to the data on the file.
588
589   Internal line numbers count from 0 starting after the prefix.
590   Actual line numbers count from 1 within the entire file.  */
591
592lin
593translate_line_number (struct file_data const *file, lin i)
594{
595  return i + file->prefix_lines + 1;
596}
597
598/* Translate a line number range.  This is always done for printing,
599   so for convenience translate to long rather than lin, so that the
600   caller can use printf with "%ld" without casting.  */
601
602void
603translate_range (struct file_data const *file,
604		 lin a, lin b,
605		 long *aptr, long *bptr)
606{
607  *aptr = translate_line_number (file, a - 1) + 1;
608  *bptr = translate_line_number (file, b + 1) - 1;
609}
610
611/* Print a pair of line numbers with SEPCHAR, translated for file FILE.
612   If the two numbers are identical, print just one number.
613
614   Args A and B are internal line numbers.
615   We print the translated (real) line numbers.  */
616
617void
618print_number_range (char sepchar, struct file_data *file, lin a, lin b)
619{
620  long trans_a, trans_b;
621  translate_range (file, a, b, &trans_a, &trans_b);
622
623  /* Note: we can have B < A in the case of a range of no lines.
624     In this case, we should print the line number before the range,
625     which is B.  */
626  if (trans_b > trans_a)
627    fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
628  else
629    fprintf (outfile, "%ld", trans_b);
630}
631
632/* Look at a hunk of edit script and report the range of lines in each file
633   that it applies to.  HUNK is the start of the hunk, which is a chain
634   of `struct change'.  The first and last line numbers of file 0 are stored in
635   *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
636   Note that these are internal line numbers that count from 0.
637
638   If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
639
640   Return UNCHANGED if only ignorable lines are inserted or deleted,
641   OLD if lines of file 0 are deleted,
642   NEW if lines of file 1 are inserted,
643   and CHANGED if both kinds of changes are found. */
644
645enum changes
646analyze_hunk (struct change *hunk,
647	      lin *first0, lin *last0,
648	      lin *first1, lin *last1)
649{
650  struct change *next;
651  lin l0, l1;
652  lin show_from, show_to;
653  lin i;
654  bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
655  size_t trivial_length = (int) ignore_blank_lines - 1;
656    /* If 0, ignore zero-length lines;
657       if SIZE_MAX, do not ignore lines just because of their length.  */
658
659  char const * const *linbuf0 = files[0].linbuf;  /* Help the compiler.  */
660  char const * const *linbuf1 = files[1].linbuf;
661
662  show_from = show_to = 0;
663
664  *first0 = hunk->line0;
665  *first1 = hunk->line1;
666
667  next = hunk;
668  do
669    {
670      l0 = next->line0 + next->deleted - 1;
671      l1 = next->line1 + next->inserted - 1;
672      show_from += next->deleted;
673      show_to += next->inserted;
674
675      for (i = next->line0; i <= l0 && trivial; i++)
676	{
677	  char const *line = linbuf0[i];
678	  size_t len = linbuf0[i + 1] - line - 1;
679	  if (len != trivial_length
680	      && (! ignore_regexp.fastmap
681		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
682	    trivial = 0;
683	}
684
685      for (i = next->line1; i <= l1 && trivial; i++)
686	{
687	  char const *line = linbuf1[i];
688	  size_t len = linbuf1[i + 1] - line - 1;
689	  if (len != trivial_length
690	      && (! ignore_regexp.fastmap
691		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
692	    trivial = 0;
693	}
694    }
695  while ((next = next->link) != 0);
696
697  *last0 = l0;
698  *last1 = l1;
699
700  /* If all inserted or deleted lines are ignorable,
701     tell the caller to ignore this hunk.  */
702
703  if (trivial)
704    return UNCHANGED;
705
706  return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
707}
708
709/* Concatenate three strings, returning a newly malloc'd string.  */
710
711char *
712concat (char const *s1, char const *s2, char const *s3)
713{
714  char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
715  sprintf (new, "%s%s%s", s1, s2, s3);
716  return new;
717}
718
719/* Yield a new block of SIZE bytes, initialized to zero.  */
720
721void *
722zalloc (size_t size)
723{
724  void *p = xmalloc (size);
725  memset (p, 0, size);
726  return p;
727}
728
729/* Yield the newly malloc'd pathname
730   of the file in DIR whose filename is FILE.  */
731
732char *
733dir_file_pathname (char const *dir, char const *file)
734{
735  char const *base = base_name (dir);
736  bool omit_slash = !*base || base[strlen (base) - 1] == '/';
737  return concat (dir, "/" + omit_slash, file);
738}
739
740void
741debug_script (struct change *sp)
742{
743  fflush (stdout);
744
745  for (; sp; sp = sp->link)
746    {
747      long line0 = sp->line0;
748      long line1 = sp->line1;
749      long deleted = sp->deleted;
750      long inserted = sp->inserted;
751      fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
752	       line0, line1, deleted, inserted);
753    }
754
755  fflush (stderr);
756}
757