1/*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2
3/*
4 * Written by Raymond Lai <ray@cyth.net>.
5 * Public domain.
6 */
7
8#include <sys/cdefs.h>
9__FBSDID("$FreeBSD$");
10
11#include <sys/param.h>
12#include <sys/queue.h>
13#include <sys/stat.h>
14#include <sys/types.h>
15#include <sys/wait.h>
16
17#include <ctype.h>
18#include <err.h>
19#include <errno.h>
20#include <fcntl.h>
21#include <getopt.h>
22#include <limits.h>
23#include <paths.h>
24#include <stdint.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29
30#include "extern.h"
31
32static char diff_path[] = "/usr/bin/diff";
33
34#define WIDTH 126
35/*
36 * Each column must be at least one character wide, plus three
37 * characters between the columns (space, [<|>], space).
38 */
39#define WIDTH_MIN 5
40
41/* 3 kilobytes of chars */
42#define MAX_CHECK 768
43
44/* A single diff line. */
45struct diffline {
46	STAILQ_ENTRY(diffline) diffentries;
47	char	*left;
48	char	 div;
49	char	*right;
50};
51
52static void astrcat(char **, const char *);
53static void enqueue(char *, char, char *);
54static char *mktmpcpy(const char *);
55static int istextfile(FILE *);
56static void binexec(char *, char *, char *) __dead2;
57static void freediff(struct diffline *);
58static void int_usage(void);
59static int parsecmd(FILE *, FILE *, FILE *);
60static void printa(FILE *, size_t);
61static void printc(FILE *, size_t, FILE *, size_t);
62static void printcol(const char *, size_t *, const size_t);
63static void printd(FILE *, size_t);
64static void println(const char *, const char, const char *);
65static void processq(void);
66static void prompt(const char *, const char *);
67static void usage(void) __dead2;
68static char *xfgets(FILE *);
69
70static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
71static size_t line_width;	/* width of a line (two columns and divider) */
72static size_t width;		/* width of each column */
73static size_t file1ln, file2ln;	/* line number of file1 and file2 */
74static int Iflag = 0;	/* ignore sets matching regexp */
75static int	lflag;		/* print only left column for identical lines */
76static int	sflag;		/* skip identical lines */
77FILE *outfp;		/* file to save changes to */
78const char *tmpdir;	/* TMPDIR or /tmp */
79
80enum {
81	HELP_OPT = CHAR_MAX + 1,
82	NORMAL_OPT,
83	FCASE_SENSITIVE_OPT,
84	FCASE_IGNORE_OPT,
85	STRIPCR_OPT,
86	TSIZE_OPT,
87	DIFFPROG_OPT,
88};
89
90static struct option longopts[] = {
91	/* options only processed in sdiff */
92	{ "suppress-common-lines",	no_argument,		NULL,	's' },
93	{ "width",			required_argument,	NULL,	'w' },
94
95	{ "output",			required_argument,	NULL,	'o' },
96	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
97
98	/* Options processed by diff. */
99	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
100	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
101	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
102	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
103	{ "help",			no_argument,		NULL,	HELP_OPT },
104	{ "text",			no_argument,		NULL,	'a' },
105	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
106	{ "ignore-space-change",	no_argument,		NULL,	'b' },
107	{ "minimal",			no_argument,		NULL,	'd' },
108	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
109	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
110	{ "ignore-case",		no_argument,		NULL,	'i' },
111	{ "left-column",		no_argument,		NULL,	'l' },
112	{ "expand-tabs",		no_argument,		NULL,	't' },
113	{ "speed-large-files",		no_argument,		NULL,	'H' },
114	{ "ignore-all-space",		no_argument,		NULL,	'W' },
115
116	{ NULL,				0,			NULL,	'\0'}
117};
118
119static const char *help_msg[] = {
120	"usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
121	"-l, --left-column: only print the left column for identical lines.",
122	"-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
123	"-s, --suppress-common-lines: skip identical lines.",
124	"-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
125	"",
126	"Options passed to diff(1) are:",
127	"\t-a, --text: treat file1 and file2 as text files.",
128	"\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
129	"\t-d, --minimal: minimize diff size.",
130	"\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
131	"\t-i, --ignore-case: do a case-insensitive comparison.",
132	"\t-t, --expand-tabs: sxpand tabs to spaces.",
133	"\t-W, --ignore-all-spaces: ignore all spaces.",
134	"\t--speed-large-files: assume large file with scattered changes.",
135	"\t--strip-trailing-cr: strip trailing carriage return.",
136	"\t--ignore-file-name-case: ignore case of file names.",
137	"\t--no-ignore-file-name-case: do not ignore file name case",
138	"\t--tabsize NUM: change size of tabs (default 8.)",
139
140	NULL,
141};
142
143/*
144 * Create temporary file if source_file is not a regular file.
145 * Returns temporary file name if one was malloced, NULL if unnecessary.
146 */
147static char *
148mktmpcpy(const char *source_file)
149{
150	struct stat sb;
151	ssize_t rcount;
152	int ifd, ofd;
153	u_char buf[BUFSIZ];
154	char *target_file;
155
156	/* Open input and output. */
157	ifd = open(source_file, O_RDONLY, 0);
158	/* File was opened successfully. */
159	if (ifd != -1) {
160		if (fstat(ifd, &sb) == -1)
161			err(2, "error getting file status from %s", source_file);
162
163		/* Regular file. */
164		if (S_ISREG(sb.st_mode)) {
165			close(ifd);
166			return (NULL);
167		}
168	} else {
169		/* If ``-'' does not exist the user meant stdin. */
170		if (errno == ENOENT && strcmp(source_file, "-") == 0)
171			ifd = STDIN_FILENO;
172		else
173			err(2, "error opening %s", source_file);
174	}
175
176	/* Not a regular file, so copy input into temporary file. */
177	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
178		err(2, "asprintf");
179	if ((ofd = mkstemp(target_file)) == -1) {
180		warn("error opening %s", target_file);
181		goto FAIL;
182	}
183	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
184	    rcount != 0) {
185		ssize_t wcount;
186
187		wcount = write(ofd, buf, (size_t)rcount);
188		if (-1 == wcount || rcount != wcount) {
189			warn("error writing to %s", target_file);
190			goto FAIL;
191		}
192	}
193	if (rcount == -1) {
194		warn("error reading from %s", source_file);
195		goto FAIL;
196	}
197
198	close(ifd);
199	close(ofd);
200
201	return (target_file);
202
203FAIL:
204	unlink(target_file);
205	exit(2);
206}
207
208int
209main(int argc, char **argv)
210{
211	FILE *diffpipe=NULL, *file1, *file2;
212	size_t diffargc = 0, wflag = WIDTH;
213	int ch, fd[2] = {-1}, status;
214	pid_t pid=0;
215	const char *outfile = NULL;
216	char **diffargv, *diffprog = diff_path, *filename1, *filename2,
217	     *tmp1, *tmp2, *s1, *s2;
218	int i;
219	char I_arg[] = "-I";
220	char speed_lf[] = "--speed-large-files";
221
222	/*
223	 * Process diff flags.
224	 */
225	/*
226	 * Allocate memory for diff arguments and NULL.
227	 * Each flag has at most one argument, so doubling argc gives an
228	 * upper limit of how many diff args can be passed.  argv[0],
229	 * file1, and file2 won't have arguments so doubling them will
230	 * waste some memory; however we need an extra space for the
231	 * NULL at the end, so it sort of works out.
232	 */
233	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
234		err(2, "main");
235
236	/* Add first argument, the program name. */
237	diffargv[diffargc++] = diffprog;
238
239	/* create a dynamic string for merging single-switch options */
240	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
241		err(2, "main");
242
243	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
244	    longopts, NULL)) != -1) {
245		const char *errstr;
246
247		switch (ch) {
248		/* only compatible --long-name-form with diff */
249		case FCASE_IGNORE_OPT:
250		case FCASE_SENSITIVE_OPT:
251		case STRIPCR_OPT:
252		case TSIZE_OPT:
253		case 'S':
254		break;
255		/* combine no-arg single switches */
256		case 'a':
257		case 'B':
258		case 'b':
259		case 'd':
260		case 'E':
261		case 'i':
262		case 't':
263		case 'W':
264			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
265			/*
266			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
267			 */
268			if (ch == 'W')
269				sprintf(diffargv[1], "%sw", diffargv[1]);
270			else
271				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
272			break;
273		case 'H':
274			diffargv[diffargc++] = speed_lf;
275			break;
276		case DIFFPROG_OPT:
277			diffargv[0] = diffprog = optarg;
278			break;
279		case 'I':
280			Iflag = 1;
281			diffargv[diffargc++] = I_arg;
282			diffargv[diffargc++] = optarg;
283			break;
284		case 'l':
285			lflag = 1;
286			break;
287		case 'o':
288			outfile = optarg;
289			break;
290		case 's':
291			sflag = 1;
292			break;
293		case 'w':
294			wflag = strtonum(optarg, WIDTH_MIN,
295			    INT_MAX, &errstr);
296			if (errstr)
297				errx(2, "width is %s: %s", errstr, optarg);
298			break;
299		case HELP_OPT:
300			for (i = 0; help_msg[i] != NULL; i++)
301				printf("%s\n", help_msg[i]);
302			exit(0);
303			break;
304		default:
305			usage();
306			break;
307		}
308	}
309
310	/* no single switches were used */
311	if (strcmp(diffargv[1], "-") == 0 ) {
312		for ( i = 1; i < argc-1; i++) {
313			diffargv[i] = diffargv[i+1];
314		}
315		diffargv[diffargc-1] = NULL;
316		diffargc--;
317	}
318
319	argc -= optind;
320	argv += optind;
321
322	if (argc != 2)
323		usage();
324
325	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
326		err(2, "could not open: %s", optarg);
327
328	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
329		tmpdir = _PATH_TMP;
330
331	filename1 = argv[0];
332	filename2 = argv[1];
333
334	/*
335	 * Create temporary files for diff and sdiff to share if file1
336	 * or file2 are not regular files.  This allows sdiff and diff
337	 * to read the same inputs if one or both inputs are stdin.
338	 *
339	 * If any temporary files were created, their names would be
340	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
341	 */
342	tmp1 = tmp2 = NULL;
343	/* file1 and file2 are the same, so copy to same temp file. */
344	if (strcmp(filename1, filename2) == 0) {
345		if ((tmp1 = mktmpcpy(filename1)))
346			filename1 = filename2 = tmp1;
347	/* Copy file1 and file2 into separate temp files. */
348	} else {
349		if ((tmp1 = mktmpcpy(filename1)))
350			filename1 = tmp1;
351		if ((tmp2 = mktmpcpy(filename2)))
352			filename2 = tmp2;
353	}
354
355	diffargv[diffargc++] = filename1;
356	diffargv[diffargc++] = filename2;
357	/* Add NULL to end of array to indicate end of array. */
358	diffargv[diffargc++] = NULL;
359
360	/* Subtract column divider and divide by two. */
361	width = (wflag - 3) / 2;
362	/* Make sure line_width can fit in size_t. */
363	if (width > (SIZE_MAX - 3) / 2)
364		errx(2, "width is too large: %zu", width);
365	line_width = width * 2 + 3;
366
367	if (pipe(fd))
368		err(2, "pipe");
369
370	switch (pid = fork()) {
371	case 0:
372		/* child */
373		/* We don't read from the pipe. */
374		close(fd[0]);
375		if (dup2(fd[1], STDOUT_FILENO) == -1)
376			err(2, "child could not duplicate descriptor");
377		/* Free unused descriptor. */
378		close(fd[1]);
379		execvp(diffprog, diffargv);
380		err(2, "could not execute diff: %s", diffprog);
381		break;
382	case -1:
383		err(2, "could not fork");
384		break;
385	}
386
387	/* parent */
388	/* We don't write to the pipe. */
389	close(fd[1]);
390
391	/* Open pipe to diff command. */
392	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
393		err(2, "could not open diff pipe");
394
395	if ((file1 = fopen(filename1, "r")) == NULL)
396		err(2, "could not open %s", filename1);
397	if ((file2 = fopen(filename2, "r")) == NULL)
398		err(2, "could not open %s", filename2);
399	if (!istextfile(file1) || !istextfile(file2)) {
400		/* Close open files and pipe, delete temps */
401		fclose(file1);
402		fclose(file2);
403		if (diffpipe != NULL)
404			fclose(diffpipe);
405		if (tmp1)
406			if (unlink(tmp1))
407				warn("Error deleting %s.", tmp1);
408		if (tmp2)
409			if (unlink(tmp2))
410				warn("Error deleting %s.", tmp2);
411		free(tmp1);
412		free(tmp2);
413		binexec(diffprog, filename1, filename2);
414	}
415	/* Line numbers start at one. */
416	file1ln = file2ln = 1;
417
418	/* Read and parse diff output. */
419	while (parsecmd(diffpipe, file1, file2) != EOF)
420		;
421	fclose(diffpipe);
422
423	/* Wait for diff to exit. */
424	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
425	    WEXITSTATUS(status) >= 2)
426		err(2, "diff exited abnormally.");
427
428	/* Delete and free unneeded temporary files. */
429	if (tmp1)
430		if (unlink(tmp1))
431			warn("Error deleting %s.", tmp1);
432	if (tmp2)
433		if (unlink(tmp2))
434			warn("Error deleting %s.", tmp2);
435	free(tmp1);
436	free(tmp2);
437	filename1 = filename2 = tmp1 = tmp2 = NULL;
438
439	/* No more diffs, so print common lines. */
440	if (lflag)
441		while ((s1 = xfgets(file1)))
442			enqueue(s1, ' ', NULL);
443	else
444		for (;;) {
445			s1 = xfgets(file1);
446			s2 = xfgets(file2);
447			if (s1 || s2)
448				enqueue(s1, ' ', s2);
449			else
450				break;
451		}
452	fclose(file1);
453	fclose(file2);
454	/* Process unmodified lines. */
455	processq();
456
457	/* Return diff exit status. */
458	return (WEXITSTATUS(status));
459}
460
461/*
462 * When sdiff detects a binary file as input, executes them with
463 * diff to maintain the same behavior as GNU sdiff with binary input.
464 */
465static void
466binexec(char *diffprog, char *f1, char *f2)
467{
468
469	char *args[] = {diffprog, f1, f2, (char *) 0};
470	execv(diffprog, args);
471
472	/* If execv() fails, sdiff's execution will continue below. */
473	errx(1, "could not execute diff process");
474}
475
476/*
477 * Checks whether a file appears to be a text file.
478 */
479static int
480istextfile(FILE *f)
481{
482	int	ch, i;
483
484	if (f == NULL)
485		return (1);
486	rewind(f);
487	for (i = 0; i <= MAX_CHECK; i++) {
488		ch = fgetc(f);
489		if (ch == '\0') {
490			rewind(f);
491			return (0);
492		}
493		if (ch == EOF)
494			break;
495	}
496	rewind(f);
497	return (1);
498}
499
500/*
501 * Prints an individual column (left or right), taking into account
502 * that tabs are variable-width.  Takes a string, the current column
503 * the cursor is on the screen, and the maximum value of the column.
504 * The column value is updated as we go along.
505 */
506static void
507printcol(const char *s, size_t *col, const size_t col_max)
508{
509
510	for (; *s && *col < col_max; ++s) {
511		size_t new_col;
512
513		switch (*s) {
514		case '\t':
515			/*
516			 * If rounding to next multiple of eight causes
517			 * an integer overflow, just return.
518			 */
519			if (*col > SIZE_MAX - 8)
520				return;
521
522			/* Round to next multiple of eight. */
523			new_col = (*col / 8 + 1) * 8;
524
525			/*
526			 * If printing the tab goes past the column
527			 * width, don't print it and just quit.
528			 */
529			if (new_col > col_max)
530				return;
531			*col = new_col;
532			break;
533		default:
534			++(*col);
535		}
536		putchar(*s);
537	}
538}
539
540/*
541 * Prompts user to either choose between two strings or edit one, both,
542 * or neither.
543 */
544static void
545prompt(const char *s1, const char *s2)
546{
547	char *cmd;
548
549	/* Print command prompt. */
550	putchar('%');
551
552	/* Get user input. */
553	for (; (cmd = xfgets(stdin)); free(cmd)) {
554		const char *p;
555
556		/* Skip leading whitespace. */
557		for (p = cmd; isspace(*p); ++p)
558			;
559		switch (*p) {
560		case 'e':
561			/* Skip `e'. */
562			++p;
563			if (eparse(p, s1, s2) == -1)
564				goto USAGE;
565			break;
566		case 'l':
567		case '1':
568			/* Choose left column as-is. */
569			if (s1 != NULL)
570				fprintf(outfp, "%s\n", s1);
571			/* End of command parsing. */
572			break;
573		case 'q':
574			goto QUIT;
575		case 'r':
576		case '2':
577			/* Choose right column as-is. */
578			if (s2 != NULL)
579				fprintf(outfp, "%s\n", s2);
580			/* End of command parsing. */
581			break;
582		case 's':
583			sflag = 1;
584			goto PROMPT;
585		case 'v':
586			sflag = 0;
587			/* FALLTHROUGH */
588		default:
589			/* Interactive usage help. */
590USAGE:
591			int_usage();
592PROMPT:
593			putchar('%');
594
595			/* Prompt user again. */
596			continue;
597		}
598		free(cmd);
599		return;
600	}
601
602	/*
603	 * If there was no error, we received an EOF from stdin, so we
604	 * should quit.
605	 */
606QUIT:
607	fclose(outfp);
608	exit(0);
609}
610
611/*
612 * Takes two strings, separated by a column divider.  NULL strings are
613 * treated as empty columns.  If the divider is the ` ' character, the
614 * second column is not printed (-l flag).  In this case, the second
615 * string must be NULL.  When the second column is NULL, the divider
616 * does not print the trailing space following the divider character.
617 *
618 * Takes into account that tabs can take multiple columns.
619 */
620static void
621println(const char *s1, const char divider, const char *s2)
622{
623	size_t col;
624
625	/* Print first column.  Skips if s1 == NULL. */
626	col = 0;
627	if (s1) {
628		/* Skip angle bracket and space. */
629		printcol(s1, &col, width);
630
631	}
632
633	/* Otherwise, we pad this column up to width. */
634	for (; col < width; ++col)
635		putchar(' ');
636
637	/* Only print left column. */
638	if (divider == ' ' && !s2) {
639		printf(" (\n");
640		return;
641	}
642
643	/*
644	 * Print column divider.  If there is no second column, we don't
645	 * need to add the space for padding.
646	 */
647	if (!s2) {
648		printf(" %c\n", divider);
649		return;
650	}
651	printf(" %c ", divider);
652	col += 3;
653
654	/* Skip angle bracket and space. */
655	printcol(s2, &col, line_width);
656
657	putchar('\n');
658}
659
660/*
661 * Reads a line from file and returns as a string.  If EOF is reached,
662 * NULL is returned.  The returned string must be freed afterwards.
663 */
664static char *
665xfgets(FILE *file)
666{
667	size_t linecap;
668	ssize_t l;
669	char *s;
670
671	clearerr(file);
672	linecap = 0;
673	s = NULL;
674
675	if ((l = getline(&s, &linecap, file)) == -1) {
676		if (ferror(file))
677			err(2, "error reading file");
678		return (NULL);
679	}
680
681	if (s[l-1] == '\n')
682		s[l-1] = '\0';
683
684	return (s);
685}
686
687/*
688 * Parse ed commands from diffpipe and print lines from file1 (lines
689 * to change or delete) or file2 (lines to add or change).
690 * Returns EOF or 0.
691 */
692static int
693parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
694{
695	size_t file1start, file1end, file2start, file2end, n;
696	/* ed command line and pointer to characters in line */
697	char *line, *p, *q;
698	const char *errstr;
699	char c, cmd;
700
701	/* Read ed command. */
702	if (!(line = xfgets(diffpipe)))
703		return (EOF);
704
705	p = line;
706	/* Go to character after line number. */
707	while (isdigit(*p))
708		++p;
709	c = *p;
710	*p++ = 0;
711	file1start = strtonum(line, 0, INT_MAX, &errstr);
712	if (errstr)
713		errx(2, "file1 start is %s: %s", errstr, line);
714
715	/* A range is specified for file1. */
716	if (c == ',') {
717		q = p;
718		/* Go to character after file2end. */
719		while (isdigit(*p))
720			++p;
721		c = *p;
722		*p++ = 0;
723		file1end = strtonum(q, 0, INT_MAX, &errstr);
724		if (errstr)
725			errx(2, "file1 end is %s: %s", errstr, line);
726		if (file1start > file1end)
727			errx(2, "invalid line range in file1: %s", line);
728	} else
729		file1end = file1start;
730
731	cmd = c;
732	/* Check that cmd is valid. */
733	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
734		errx(2, "ed command not recognized: %c: %s", cmd, line);
735
736	q = p;
737	/* Go to character after line number. */
738	while (isdigit(*p))
739		++p;
740	c = *p;
741	*p++ = 0;
742	file2start = strtonum(q, 0, INT_MAX, &errstr);
743	if (errstr)
744		errx(2, "file2 start is %s: %s", errstr, line);
745
746	/*
747	 * There should either be a comma signifying a second line
748	 * number or the line should just end here.
749	 */
750	if (c != ',' && c != '\0')
751		errx(2, "invalid line range in file2: %c: %s", c, line);
752
753	if (c == ',') {
754
755		file2end = strtonum(p, 0, INT_MAX, &errstr);
756		if (errstr)
757			errx(2, "file2 end is %s: %s", errstr, line);
758		if (file2start >= file2end)
759			errx(2, "invalid line range in file2: %s", line);
760	} else
761		file2end = file2start;
762
763	/* Appends happen _after_ stated line. */
764	if (cmd == 'a') {
765		if (file1start != file1end)
766			errx(2, "append cannot have a file1 range: %s",
767			    line);
768		if (file1start == SIZE_MAX)
769			errx(2, "file1 line range too high: %s", line);
770		file1start = ++file1end;
771	}
772	/*
773	 * I'm not sure what the deal is with the line numbers for
774	 * deletes, though.
775	 */
776	else if (cmd == 'd') {
777		if (file2start != file2end)
778			errx(2, "delete cannot have a file2 range: %s",
779			    line);
780		if (file2start == SIZE_MAX)
781			errx(2, "file2 line range too high: %s", line);
782		file2start = ++file2end;
783	}
784
785	/*
786	 * Continue reading file1 and file2 until we reach line numbers
787	 * specified by diff.  Should only happen with -I flag.
788	 */
789	for (; file1ln < file1start && file2ln < file2start;
790	    ++file1ln, ++file2ln) {
791		char *s1, *s2;
792
793		if (!(s1 = xfgets(file1)))
794			errx(2, "file1 shorter than expected");
795		if (!(s2 = xfgets(file2)))
796			errx(2, "file2 shorter than expected");
797
798		/* If the -l flag was specified, print only left column. */
799		if (lflag) {
800			free(s2);
801			/*
802			 * XXX - If -l and -I are both specified, all
803			 * unchanged or ignored lines are shown with a
804			 * `(' divider.  This matches GNU sdiff, but I
805			 * believe it is a bug.  Just check out:
806			 * gsdiff -l -I '^$' samefile samefile.
807			 */
808			if (Iflag)
809				enqueue(s1, '(', NULL);
810			else
811				enqueue(s1, ' ', NULL);
812		} else
813			enqueue(s1, ' ', s2);
814	}
815	/* Ignore deleted lines. */
816	for (; file1ln < file1start; ++file1ln) {
817		char *s;
818
819		if (!(s = xfgets(file1)))
820			errx(2, "file1 shorter than expected");
821
822		enqueue(s, '(', NULL);
823	}
824	/* Ignore added lines. */
825	for (; file2ln < file2start; ++file2ln) {
826		char *s;
827
828		if (!(s = xfgets(file2)))
829			errx(2, "file2 shorter than expected");
830
831		/* If -l flag was given, don't print right column. */
832		if (lflag)
833			free(s);
834		else
835			enqueue(NULL, ')', s);
836	}
837
838	/* Process unmodified or skipped lines. */
839	processq();
840
841	switch (cmd) {
842	case 'a':
843		printa(file2, file2end);
844		n = file2end - file2start + 1;
845		break;
846	case 'c':
847		printc(file1, file1end, file2, file2end);
848		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
849		break;
850	case 'd':
851		printd(file1, file1end);
852		n = file1end - file1start + 1;
853		break;
854	default:
855		errx(2, "invalid diff command: %c: %s", cmd, line);
856	}
857	free(line);
858
859	/* Skip to next ed line. */
860	while (n--) {
861		if (!(line = xfgets(diffpipe)))
862			errx(2, "diff ended early");
863		free(line);
864	}
865
866	return (0);
867}
868
869/*
870 * Queues up a diff line.
871 */
872static void
873enqueue(char *left, char divider, char *right)
874{
875	struct diffline *diffp;
876
877	if (!(diffp = malloc(sizeof(struct diffline))))
878		err(2, "enqueue");
879	diffp->left = left;
880	diffp->div = divider;
881	diffp->right = right;
882	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
883}
884
885/*
886 * Free a diffline structure and its elements.
887 */
888static void
889freediff(struct diffline *diffp)
890{
891
892	free(diffp->left);
893	free(diffp->right);
894	free(diffp);
895}
896
897/*
898 * Append second string into first.  Repeated appends to the same string
899 * are cached, making this an O(n) function, where n = strlen(append).
900 */
901static void
902astrcat(char **s, const char *append)
903{
904	/* Length of string in previous run. */
905	static size_t offset = 0;
906	size_t newsiz;
907	/*
908	 * String from previous run.  Compared to *s to see if we are
909	 * dealing with the same string.  If so, we can use offset.
910	 */
911	static const char *oldstr = NULL;
912	char *newstr;
913
914	/*
915	 * First string is NULL, so just copy append.
916	 */
917	if (!*s) {
918		if (!(*s = strdup(append)))
919			err(2, "astrcat");
920
921		/* Keep track of string. */
922		offset = strlen(*s);
923		oldstr = *s;
924
925		return;
926	}
927
928	/*
929	 * *s is a string so concatenate.
930	 */
931
932	/* Did we process the same string in the last run? */
933	/*
934	 * If this is a different string from the one we just processed
935	 * cache new string.
936	 */
937	if (oldstr != *s) {
938		offset = strlen(*s);
939		oldstr = *s;
940	}
941
942	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
943	newsiz = offset + 1 + strlen(append) + 1;
944
945	/* Resize *s to fit new string. */
946	newstr = realloc(*s, newsiz);
947	if (newstr == NULL)
948		err(2, "astrcat");
949	*s = newstr;
950
951	/* *s + offset should be end of string. */
952	/* Concatenate. */
953	strlcpy(*s + offset, "\n", newsiz - offset);
954	strlcat(*s + offset, append, newsiz - offset);
955
956	/* New string length should be exactly newsiz - 1 characters. */
957	/* Store generated string's values. */
958	offset = newsiz - 1;
959	oldstr = *s;
960}
961
962/*
963 * Process diff set queue, printing, prompting, and saving each diff
964 * line stored in queue.
965 */
966static void
967processq(void)
968{
969	struct diffline *diffp;
970	char divc, *left, *right;
971
972	/* Don't process empty queue. */
973	if (STAILQ_EMPTY(&diffhead))
974		return;
975
976	/* Remember the divider. */
977	divc = STAILQ_FIRST(&diffhead)->div;
978
979	left = NULL;
980	right = NULL;
981	/*
982	 * Go through set of diffs, concatenating each line in left or
983	 * right column into two long strings, `left' and `right'.
984	 */
985	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
986		/*
987		 * Print changed lines if -s was given,
988		 * print all lines if -s was not given.
989		 */
990		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
991		    diffp->div == '>')
992			println(diffp->left, diffp->div, diffp->right);
993
994		/* Append new lines to diff set. */
995		if (diffp->left)
996			astrcat(&left, diffp->left);
997		if (diffp->right)
998			astrcat(&right, diffp->right);
999	}
1000
1001	/* Empty queue and free each diff line and its elements. */
1002	while (!STAILQ_EMPTY(&diffhead)) {
1003		diffp = STAILQ_FIRST(&diffhead);
1004		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1005		freediff(diffp);
1006	}
1007
1008	/* Write to outfp, prompting user if lines are different. */
1009	if (outfp)
1010		switch (divc) {
1011		case ' ': case '(': case ')':
1012			fprintf(outfp, "%s\n", left);
1013			break;
1014		case '|': case '<': case '>':
1015			prompt(left, right);
1016			break;
1017		default:
1018			errx(2, "invalid divider: %c", divc);
1019		}
1020
1021	/* Free left and right. */
1022	free(left);
1023	free(right);
1024}
1025
1026/*
1027 * Print lines following an (a)ppend command.
1028 */
1029static void
1030printa(FILE *file, size_t line2)
1031{
1032	char *line;
1033
1034	for (; file2ln <= line2; ++file2ln) {
1035		if (!(line = xfgets(file)))
1036			errx(2, "append ended early");
1037		enqueue(NULL, '>', line);
1038	}
1039	processq();
1040}
1041
1042/*
1043 * Print lines following a (c)hange command, from file1ln to file1end
1044 * and from file2ln to file2end.
1045 */
1046static void
1047printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1048{
1049	struct fileline {
1050		STAILQ_ENTRY(fileline)	 fileentries;
1051		char			*line;
1052	};
1053	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1054
1055	/* Read lines to be deleted. */
1056	for (; file1ln <= file1end; ++file1ln) {
1057		struct fileline *linep;
1058		char *line1;
1059
1060		/* Read lines from both. */
1061		if (!(line1 = xfgets(file1)))
1062			errx(2, "error reading file1 in delete in change");
1063
1064		/* Add to delete queue. */
1065		if (!(linep = malloc(sizeof(struct fileline))))
1066			err(2, "printc");
1067		linep->line = line1;
1068		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1069	}
1070
1071	/* Process changed lines.. */
1072	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1073	    ++file2ln) {
1074		struct fileline *del;
1075		char *add;
1076
1077		/* Get add line. */
1078		if (!(add = xfgets(file2)))
1079			errx(2, "error reading add in change");
1080
1081		del = STAILQ_FIRST(&delqhead);
1082		enqueue(del->line, '|', add);
1083		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1084		/*
1085		 * Free fileline structure but not its elements since
1086		 * they are queued up.
1087		 */
1088		free(del);
1089	}
1090	processq();
1091
1092	/* Process remaining lines to add. */
1093	for (; file2ln <= file2end; ++file2ln) {
1094		char *add;
1095
1096		/* Get add line. */
1097		if (!(add = xfgets(file2)))
1098			errx(2, "error reading add in change");
1099
1100		enqueue(NULL, '>', add);
1101	}
1102	processq();
1103
1104	/* Process remaining lines to delete. */
1105	while (!STAILQ_EMPTY(&delqhead)) {
1106		struct fileline *filep;
1107
1108		filep = STAILQ_FIRST(&delqhead);
1109		enqueue(filep->line, '<', NULL);
1110		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1111		free(filep);
1112	}
1113	processq();
1114}
1115
1116/*
1117 * Print deleted lines from file, from file1ln to file1end.
1118 */
1119static void
1120printd(FILE *file1, size_t file1end)
1121{
1122	char *line1;
1123
1124	/* Print out lines file1ln to line2. */
1125	for (; file1ln <= file1end; ++file1ln) {
1126		if (!(line1 = xfgets(file1)))
1127			errx(2, "file1 ended early in delete");
1128		enqueue(line1, '<', NULL);
1129	}
1130	processq();
1131}
1132
1133/*
1134 * Interactive mode usage.
1135 */
1136static void
1137int_usage(void)
1138{
1139
1140	puts("e:\tedit blank diff\n"
1141	    "eb:\tedit both diffs concatenated\n"
1142	    "el:\tedit left diff\n"
1143	    "er:\tedit right diff\n"
1144	    "l | 1:\tchoose left diff\n"
1145	    "r | 2:\tchoose right diff\n"
1146	    "s:\tsilent mode--don't print identical lines\n"
1147	    "v:\tverbose mode--print identical lines\n"
1148	    "q:\tquit");
1149}
1150
1151static void
1152usage(void)
1153{
1154
1155	fprintf(stderr,
1156	    "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1157	    " file2\n");
1158	exit(2);
1159}
1160