1/* Support routines for GNU DIFF.
2   Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
3
4This file is part of GNU DIFF.
5
6GNU DIFF is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU DIFF is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16*/
17
18#include "diff.h"
19
20#if __STDC__
21#include <stdarg.h>
22#else
23#include <varargs.h>
24#endif
25
26#ifndef strerror
27extern char *strerror ();
28#endif
29
30/* Queue up one-line messages to be printed at the end,
31   when -l is specified.  Each message is recorded with a `struct msg'.  */
32
33struct msg
34{
35  struct msg *next;
36  char const *format;
37  char const *arg1;
38  char const *arg2;
39  char const *arg3;
40  char const *arg4;
41};
42
43/* Head of the chain of queues messages.  */
44
45static struct msg *msg_chain;
46
47/* Tail of the chain of queues messages.  */
48
49static struct msg **msg_chain_end = &msg_chain;
50
51/* Use when a system call returns non-zero status.
52   TEXT should normally be the file name.  */
53
54void
55perror_with_name (text)
56     char const *text;
57{
58  int e = errno;
59
60  if (callbacks && callbacks->error)
61    (*callbacks->error) ("%s: %s", text, strerror (e));
62  else
63    {
64      fprintf (stderr, "%s: ", diff_program_name);
65      errno = e;
66      perror (text);
67    }
68}
69
70/* Use when a system call returns non-zero status and that is fatal.  */
71
72void
73pfatal_with_name (text)
74     char const *text;
75{
76  int e = errno;
77  print_message_queue ();
78  if (callbacks && callbacks->error)
79    (*callbacks->error) ("%s: %s", text, strerror (e));
80  else
81    {
82      fprintf (stderr, "%s: ", diff_program_name);
83      errno = e;
84      perror (text);
85    }
86  DIFF_ABORT (2);
87}
88
89/* Print an error message from the format-string FORMAT
90   with args ARG1 and ARG2.  */
91
92void
93diff_error (format, arg, arg1)
94     char const *format, *arg, *arg1;
95{
96  if (callbacks && callbacks->error)
97    (*callbacks->error) (format, arg, arg1);
98  else
99    {
100      fprintf (stderr, "%s: ", diff_program_name);
101      fprintf (stderr, format, arg, arg1);
102      fprintf (stderr, "\n");
103    }
104}
105
106/* Print an error message containing the string TEXT, then exit.  */
107
108void
109fatal (m)
110     char const *m;
111{
112  print_message_queue ();
113  diff_error ("%s", m, 0);
114  DIFF_ABORT (2);
115}
116
117/* Like printf, except if -l in effect then save the message and print later.
118   This is used for things like "binary files differ" and "Only in ...".  */
119
120void
121message (format, arg1, arg2)
122     char const *format, *arg1, *arg2;
123{
124  message5 (format, arg1, arg2, 0, 0);
125}
126
127void
128message5 (format, arg1, arg2, arg3, arg4)
129     char const *format, *arg1, *arg2, *arg3, *arg4;
130{
131  if (paginate_flag)
132    {
133      struct msg *new = (struct msg *) xmalloc (sizeof (struct msg));
134      new->format = format;
135      new->arg1 = concat (arg1, "", "");
136      new->arg2 = concat (arg2, "", "");
137      new->arg3 = arg3 ? concat (arg3, "", "") : 0;
138      new->arg4 = arg4 ? concat (arg4, "", "") : 0;
139      new->next = 0;
140      *msg_chain_end = new;
141      msg_chain_end = &new->next;
142    }
143  else
144    {
145      if (sdiff_help_sdiff)
146	write_output (" ", 1);
147      printf_output (format, arg1, arg2, arg3, arg4);
148    }
149}
150
151/* Output all the messages that were saved up by calls to `message'.  */
152
153void
154print_message_queue ()
155{
156  struct msg *m;
157
158  for (m = msg_chain; m; m = m->next)
159    printf_output (m->format, m->arg1, m->arg2, m->arg3, m->arg4);
160}
161
162/* Call before outputting the results of comparing files NAME0 and NAME1
163   to set up OUTFILE, the stdio stream for the output to go to.
164
165   Usually, OUTFILE is just stdout.  But when -l was specified
166   we fork off a `pr' and make OUTFILE a pipe to it.
167   `pr' then outputs to our stdout.  */
168
169static char const *current_name0;
170static char const *current_name1;
171static int current_depth;
172
173static int output_in_progress = 0;
174
175void
176setup_output (name0, name1, depth)
177     char const *name0, *name1;
178     int depth;
179{
180  current_name0 = name0;
181  current_name1 = name1;
182  current_depth = depth;
183}
184
185#if HAVE_FORK && defined (PR_PROGRAM)
186static pid_t pr_pid;
187#endif
188
189void
190begin_output ()
191{
192  char *name;
193
194  if (output_in_progress)
195    return;
196  output_in_progress = 1;
197
198  /* Construct the header of this piece of diff.  */
199  name = xmalloc (strlen (current_name0) + strlen (current_name1)
200		  + strlen (switch_string) + 7);
201  /* Posix.2 section 4.17.6.1.1 specifies this format.  But there is a
202     bug in the first printing (IEEE Std 1003.2-1992 p 251 l 3304):
203     it says that we must print only the last component of the pathnames.
204     This requirement is silly and does not match historical practice.  */
205  sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
206
207  if (paginate_flag && callbacks && callbacks->write_output)
208    fatal ("can't paginate when using library callbacks");
209
210  if (paginate_flag)
211    {
212      /* Make OUTFILE a pipe to a subsidiary `pr'.  */
213
214#ifdef PR_PROGRAM
215
216# if HAVE_FORK
217      int pipes[2];
218
219      if (pipe (pipes) != 0)
220	pfatal_with_name ("pipe");
221
222      fflush (stdout);
223
224      pr_pid = vfork ();
225      if (pr_pid < 0)
226	pfatal_with_name ("vfork");
227
228      if (pr_pid == 0)
229	{
230	  close (pipes[1]);
231	  if (pipes[0] != STDIN_FILENO)
232	    {
233	      if (dup2 (pipes[0], STDIN_FILENO) < 0)
234		pfatal_with_name ("dup2");
235	      close (pipes[0]);
236	    }
237
238	  execl (PR_PROGRAM, PR_PROGRAM, "-f", "-h", name, 0);
239	  pfatal_with_name (PR_PROGRAM);
240	}
241      else
242	{
243	  close (pipes[0]);
244	  outfile = fdopen (pipes[1], "w");
245	  if (!outfile)
246	    pfatal_with_name ("fdopen");
247	}
248# else /* ! HAVE_FORK */
249      char *command = xmalloc (4 * strlen (name) + strlen (PR_PROGRAM) + 10);
250      char *p;
251      char const *a = name;
252      sprintf (command, "%s -f -h ", PR_PROGRAM);
253      p = command + strlen (command);
254      SYSTEM_QUOTE_ARG (p, a);
255      *p = 0;
256      outfile = popen (command, "w");
257      if (!outfile)
258	pfatal_with_name (command);
259      free (command);
260# endif /* ! HAVE_FORK */
261#else
262      fatal ("This port does not support the --paginate option to diff.");
263#endif
264    }
265  else
266    {
267
268      /* If -l was not specified, output the diff straight to `stdout'.  */
269
270      /* If handling multiple files (because scanning a directory),
271	 print which files the following output is about.  */
272      if (current_depth > 0)
273	printf_output ("%s\n", name);
274    }
275
276  free (name);
277
278  /* A special header is needed at the beginning of context output.  */
279  switch (output_style)
280    {
281    case OUTPUT_CONTEXT:
282      print_context_header (files, 0);
283      break;
284
285    case OUTPUT_UNIFIED:
286      print_context_header (files, 1);
287      break;
288
289    default:
290      break;
291    }
292}
293
294/* Call after the end of output of diffs for one file.
295   If -l was given, close OUTFILE and get rid of the `pr' subfork.  */
296
297void
298finish_output ()
299{
300  if (paginate_flag && outfile != 0 && outfile != stdout)
301    {
302#ifdef PR_PROGRAM
303      int wstatus, w;
304      if (ferror (outfile))
305	fatal ("write error");
306# if ! HAVE_FORK
307      wstatus = pclose (outfile);
308# else /* HAVE_FORK */
309      if (fclose (outfile) != 0)
310	pfatal_with_name ("write error");
311      while ((w = waitpid (pr_pid, &wstatus, 0)) < 0 && errno == EINTR)
312	;
313      if (w < 0)
314	pfatal_with_name ("waitpid");
315# endif /* HAVE_FORK */
316      if (wstatus != 0)
317	fatal ("subsidiary pr failed");
318#else
319      fatal ("internal error in finish_output");
320#endif
321    }
322
323  output_in_progress = 0;
324}
325
326/* Write something to the output file.  */
327
328void
329write_output (text, len)
330     char const *text;
331     size_t len;
332{
333  if (callbacks && callbacks->write_output)
334    (*callbacks->write_output) (text, len);
335  else if (len == 1)
336    putc (*text, outfile);
337  else
338    fwrite (text, sizeof (char), len, outfile);
339}
340
341/* Printf something to the output file.  */
342
343#if __STDC__
344#define VA_START(args, lastarg) va_start(args, lastarg)
345#else /* ! __STDC__ */
346#define VA_START(args, lastarg) va_start(args)
347#endif /* __STDC__ */
348
349void
350#if __STDC__
351printf_output (const char *format, ...)
352#else
353printf_output (format, va_alist)
354     char const *format;
355     va_dcl
356#endif
357{
358  va_list args;
359
360  VA_START (args, format);
361  if (callbacks && callbacks->write_output)
362    {
363      /* We implement our own limited printf-like functionality (%s, %d,
364	 and %c only).  Callers who want something fancier can use
365	 sprintf.  */
366      const char *p = format;
367      char *q;
368      char *str;
369      int num;
370      int ch;
371      char buf[100];
372
373      while ((q = strchr (p, '%')) != NULL)
374	{
375	  static const char msg[] =
376	    "\ninternal error: bad % in printf_output\n";
377	  (*callbacks->write_output) (p, q - p);
378
379	  switch (q[1])
380	    {
381	    case 's':
382	      str = va_arg (args, char *);
383	      (*callbacks->write_output) (str, strlen (str));
384	      break;
385	    case 'd':
386	      num = va_arg (args, int);
387	      sprintf (buf, "%d", num);
388	      (*callbacks->write_output) (buf, strlen (buf));
389	      break;
390	    case 'c':
391	      ch = va_arg (args, int);
392	      buf[0] = ch;
393	      (*callbacks->write_output) (buf, 1);
394	      break;
395	    default:
396	      (*callbacks->write_output) (msg, sizeof (msg) - 1);
397	      /* Don't just keep going, because q + 1 might point to the
398		 terminating '\0'.  */
399	      goto out;
400	    }
401	  p = q + 2;
402	}
403      (*callbacks->write_output) (p, strlen (p));
404    }
405  else
406    vfprintf (outfile, format, args);
407 out:
408  va_end (args);
409}
410
411/* Flush the output file.  */
412
413void
414flush_output ()
415{
416  if (callbacks && callbacks->flush_output)
417    (*callbacks->flush_output) ();
418  else
419    fflush (outfile);
420}
421
422/* Compare two lines (typically one from each input file)
423   according to the command line options.
424   For efficiency, this is invoked only when the lines do not match exactly
425   but an option like -i might cause us to ignore the difference.
426   Return nonzero if the lines differ.  */
427
428int
429line_cmp (s1, s2)
430     char const *s1, *s2;
431{
432  register unsigned char const *t1 = (unsigned char const *) s1;
433  register unsigned char const *t2 = (unsigned char const *) s2;
434
435  while (1)
436    {
437      register unsigned char c1 = *t1++;
438      register unsigned char c2 = *t2++;
439
440      /* Test for exact char equality first, since it's a common case.  */
441      if (c1 != c2)
442	{
443	  /* Ignore horizontal white space if -b or -w is specified.  */
444
445	  if (ignore_all_space_flag)
446	    {
447	      /* For -w, just skip past any white space.  */
448	      while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
449	      while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
450	    }
451	  else if (ignore_space_change_flag)
452	    {
453	      /* For -b, advance past any sequence of white space in line 1
454		 and consider it just one Space, or nothing at all
455		 if it is at the end of the line.  */
456	      if (ISSPACE (c1))
457		{
458		  while (c1 != '\n')
459		    {
460		      c1 = *t1++;
461		      if (! ISSPACE (c1))
462			{
463			  --t1;
464			  c1 = ' ';
465			  break;
466			}
467		    }
468		}
469
470	      /* Likewise for line 2.  */
471	      if (ISSPACE (c2))
472		{
473		  while (c2 != '\n')
474		    {
475		      c2 = *t2++;
476		      if (! ISSPACE (c2))
477			{
478			  --t2;
479			  c2 = ' ';
480			  break;
481			}
482		    }
483		}
484
485	      if (c1 != c2)
486		{
487		  /* If we went too far when doing the simple test
488		     for equality, go back to the first non-white-space
489		     character in both sides and try again.  */
490		  if (c2 == ' ' && c1 != '\n'
491		      && (unsigned char const *) s1 + 1 < t1
492		      && ISSPACE(t1[-2]))
493		    {
494		      --t1;
495		      continue;
496		    }
497		  if (c1 == ' ' && c2 != '\n'
498		      && (unsigned char const *) s2 + 1 < t2
499		      && ISSPACE(t2[-2]))
500		    {
501		      --t2;
502		      continue;
503		    }
504		}
505	    }
506
507	  /* Lowercase all letters if -i is specified.  */
508
509	  if (ignore_case_flag)
510	    {
511	      if (ISUPPER (c1))
512		c1 = tolower (c1);
513	      if (ISUPPER (c2))
514		c2 = tolower (c2);
515	    }
516
517	  if (c1 != c2)
518	    break;
519	}
520      if (c1 == '\n')
521	return 0;
522    }
523
524  return (1);
525}
526
527/* Find the consecutive changes at the start of the script START.
528   Return the last link before the first gap.  */
529
530struct change *
531find_change (start)
532     struct change *start;
533{
534  return start;
535}
536
537struct change *
538find_reverse_change (start)
539     struct change *start;
540{
541  return start;
542}
543
544/* Divide SCRIPT into pieces by calling HUNKFUN and
545   print each piece with PRINTFUN.
546   Both functions take one arg, an edit script.
547
548   HUNKFUN is called with the tail of the script
549   and returns the last link that belongs together with the start
550   of the tail.
551
552   PRINTFUN takes a subscript which belongs together (with a null
553   link at the end) and prints it.  */
554
555void
556print_script (script, hunkfun, printfun)
557     struct change *script;
558     struct change * (*hunkfun) PARAMS((struct change *));
559     void (*printfun) PARAMS((struct change *));
560{
561  struct change *next = script;
562
563  while (next)
564    {
565      struct change *this, *end;
566
567      /* Find a set of changes that belong together.  */
568      this = next;
569      end = (*hunkfun) (next);
570
571      /* Disconnect them from the rest of the changes,
572	 making them a hunk, and remember the rest for next iteration.  */
573      next = end->link;
574      end->link = 0;
575#ifdef DEBUG
576      debug_script (this);
577#endif
578
579      /* Print this hunk.  */
580      (*printfun) (this);
581
582      /* Reconnect the script so it will all be freed properly.  */
583      end->link = next;
584    }
585}
586
587/* Print the text of a single line LINE,
588   flagging it with the characters in LINE_FLAG (which say whether
589   the line is inserted, deleted, changed, etc.).  */
590
591void
592print_1_line (line_flag, line)
593     char const *line_flag;
594     char const * const *line;
595{
596  char const *text = line[0], *limit = line[1]; /* Help the compiler.  */
597  char const *flag_format = 0;
598
599  /* If -T was specified, use a Tab between the line-flag and the text.
600     Otherwise use a Space (as Unix diff does).
601     Print neither space nor tab if line-flags are empty.  */
602
603  if (line_flag && *line_flag)
604    {
605      flag_format = tab_align_flag ? "%s\t" : "%s ";
606      printf_output (flag_format, line_flag);
607    }
608
609  output_1_line (text, limit, flag_format, line_flag);
610
611  if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
612    printf_output ("\n\\ No newline at end of file\n");
613}
614
615/* Output a line from TEXT up to LIMIT.  Without -t, output verbatim.
616   With -t, expand white space characters to spaces, and if FLAG_FORMAT
617   is nonzero, output it with argument LINE_FLAG after every
618   internal carriage return, so that tab stops continue to line up.  */
619
620void
621output_1_line (text, limit, flag_format, line_flag)
622     char const *text, *limit, *flag_format, *line_flag;
623{
624  if (!tab_expand_flag)
625    write_output (text, limit - text);
626  else
627    {
628      register unsigned char c;
629      register char const *t = text;
630      register unsigned column = 0;
631      /* CC is used to avoid taking the address of the register
632         variable C.  */
633      char cc;
634
635      while (t < limit)
636	switch ((c = *t++))
637	  {
638	  case '\t':
639	    {
640	      unsigned spaces = TAB_WIDTH - column % TAB_WIDTH;
641	      column += spaces;
642	      do
643		write_output (" ", 1);
644	      while (--spaces);
645	    }
646	    break;
647
648	  case '\r':
649	    write_output ("\r", 1);
650	    if (flag_format && t < limit && *t != '\n')
651	      printf_output (flag_format, line_flag);
652	    column = 0;
653	    break;
654
655	  case '\b':
656	    if (column == 0)
657	      continue;
658	    column--;
659	    write_output ("\b", 1);
660	    break;
661
662	  default:
663	    if (ISPRINT (c))
664	      column++;
665	    cc = c;
666	    write_output (&cc, 1);
667	    break;
668	  }
669    }
670}
671
672int
673change_letter (inserts, deletes)
674     int inserts, deletes;
675{
676  if (!inserts)
677    return 'd';
678  else if (!deletes)
679    return 'a';
680  else
681    return 'c';
682}
683
684/* Translate an internal line number (an index into diff's table of lines)
685   into an actual line number in the input file.
686   The internal line number is LNUM.  FILE points to the data on the file.
687
688   Internal line numbers count from 0 starting after the prefix.
689   Actual line numbers count from 1 within the entire file.  */
690
691int
692translate_line_number (file, lnum)
693     struct file_data const *file;
694     int lnum;
695{
696  return lnum + file->prefix_lines + 1;
697}
698
699void
700translate_range (file, a, b, aptr, bptr)
701     struct file_data const *file;
702     int a, b;
703     int *aptr, *bptr;
704{
705  *aptr = translate_line_number (file, a - 1) + 1;
706  *bptr = translate_line_number (file, b + 1) - 1;
707}
708
709/* Print a pair of line numbers with SEPCHAR, translated for file FILE.
710   If the two numbers are identical, print just one number.
711
712   Args A and B are internal line numbers.
713   We print the translated (real) line numbers.  */
714
715void
716print_number_range (sepchar, file, a, b)
717     int sepchar;
718     struct file_data *file;
719     int a, b;
720{
721  int trans_a, trans_b;
722  translate_range (file, a, b, &trans_a, &trans_b);
723
724  /* Note: we can have B < A in the case of a range of no lines.
725     In this case, we should print the line number before the range,
726     which is B.  */
727  if (trans_b > trans_a)
728    printf_output ("%d%c%d", trans_a, sepchar, trans_b);
729  else
730    printf_output ("%d", trans_b);
731}
732
733/* Look at a hunk of edit script and report the range of lines in each file
734   that it applies to.  HUNK is the start of the hunk, which is a chain
735   of `struct change'.  The first and last line numbers of file 0 are stored in
736   *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
737   Note that these are internal line numbers that count from 0.
738
739   If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
740
741   Also set *DELETES nonzero if any lines of file 0 are deleted
742   and set *INSERTS nonzero if any lines of file 1 are inserted.
743   If only ignorable lines are inserted or deleted, both are
744   set to 0.  */
745
746void
747analyze_hunk (hunk, first0, last0, first1, last1, deletes, inserts)
748     struct change *hunk;
749     int *first0, *last0, *first1, *last1;
750     int *deletes, *inserts;
751{
752  int l0, l1, show_from, show_to;
753  int i;
754  int trivial = ignore_blank_lines_flag || ignore_regexp_list;
755  struct change *next;
756
757  show_from = show_to = 0;
758
759  *first0 = hunk->line0;
760  *first1 = hunk->line1;
761
762  next = hunk;
763  do
764    {
765      l0 = next->line0 + next->deleted - 1;
766      l1 = next->line1 + next->inserted - 1;
767      show_from += next->deleted;
768      show_to += next->inserted;
769
770      for (i = next->line0; i <= l0 && trivial; i++)
771	if (!ignore_blank_lines_flag || files[0].linbuf[i][0] != '\n')
772	  {
773	    struct regexp_list *r;
774	    char const *line = files[0].linbuf[i];
775	    int len = files[0].linbuf[i + 1] - line;
776
777	    for (r = ignore_regexp_list; r; r = r->next)
778	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
779		break;	/* Found a match.  Ignore this line.  */
780	    /* If we got all the way through the regexp list without
781	       finding a match, then it's nontrivial.  */
782	    if (!r)
783	      trivial = 0;
784	  }
785
786      for (i = next->line1; i <= l1 && trivial; i++)
787	if (!ignore_blank_lines_flag || files[1].linbuf[i][0] != '\n')
788	  {
789	    struct regexp_list *r;
790	    char const *line = files[1].linbuf[i];
791	    int len = files[1].linbuf[i + 1] - line;
792
793	    for (r = ignore_regexp_list; r; r = r->next)
794	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
795		break;	/* Found a match.  Ignore this line.  */
796	    /* If we got all the way through the regexp list without
797	       finding a match, then it's nontrivial.  */
798	    if (!r)
799	      trivial = 0;
800	  }
801    }
802  while ((next = next->link) != 0);
803
804  *last0 = l0;
805  *last1 = l1;
806
807  /* If all inserted or deleted lines are ignorable,
808     tell the caller to ignore this hunk.  */
809
810  if (trivial)
811    show_from = show_to = 0;
812
813  *deletes = show_from;
814  *inserts = show_to;
815}
816
817/* Concatenate three strings, returning a newly malloc'd string.  */
818
819char *
820concat (s1, s2, s3)
821     char const *s1, *s2, *s3;
822{
823  size_t len = strlen (s1) + strlen (s2) + strlen (s3);
824  char *new = xmalloc (len + 1);
825  sprintf (new, "%s%s%s", s1, s2, s3);
826  return new;
827}
828
829/* Yield the newly malloc'd pathname
830   of the file in DIR whose filename is FILE.  */
831
832char *
833dir_file_pathname (dir, file)
834     char const *dir, *file;
835{
836  char const *p = filename_lastdirchar (dir);
837  return concat (dir, "/" + (p && !p[1]), file);
838}
839
840void
841debug_script (sp)
842     struct change *sp;
843{
844  fflush (stdout);
845  for (; sp; sp = sp->link)
846    fprintf (stderr, "%3d %3d delete %d insert %d\n",
847	     sp->line0, sp->line1, sp->deleted, sp->inserted);
848  fflush (stderr);
849}
850