sdiff.c revision 298924
1/*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2
3/*
4 * Written by Raymond Lai <ray@cyth.net>.
5 * Public domain.
6 */
7
8#include <sys/cdefs.h>
9__FBSDID("$FreeBSD: head/usr.bin/sdiff/sdiff.c 298924 2016-05-02 15:07:43Z bz $");
10
11#include <sys/param.h>
12#include <sys/queue.h>
13#include <sys/stat.h>
14#include <sys/types.h>
15#include <sys/wait.h>
16
17#include <ctype.h>
18#include <err.h>
19#include <errno.h>
20#include <fcntl.h>
21#include <getopt.h>
22#include <limits.h>
23#include <paths.h>
24#include <stdint.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29#include <libutil.h>
30
31#include "common.h"
32#include "extern.h"
33
34#define DIFF_PATH	"/usr/bin/diff"
35
36#define WIDTH 126
37/*
38 * Each column must be at least one character wide, plus three
39 * characters between the columns (space, [<|>], space).
40 */
41#define WIDTH_MIN 5
42
43/* 3 kilobytes of chars */
44#define MAX_CHECK 768
45
46/* A single diff line. */
47struct diffline {
48	STAILQ_ENTRY(diffline) diffentries;
49	char	*left;
50	char	 div;
51	char	*right;
52};
53
54static void astrcat(char **, const char *);
55static void enqueue(char *, char, char *);
56static char *mktmpcpy(const char *);
57static int istextfile(FILE *);
58static void binexec(char *, char *, char *) __dead2;
59static void freediff(struct diffline *);
60static void int_usage(void);
61static int parsecmd(FILE *, FILE *, FILE *);
62static void printa(FILE *, size_t);
63static void printc(FILE *, size_t, FILE *, size_t);
64static void printcol(const char *, size_t *, const size_t);
65static void printd(FILE *, size_t);
66static void println(const char *, const char, const char *);
67static void processq(void);
68static void prompt(const char *, const char *);
69static void usage(void) __dead2;
70static char *xfgets(FILE *);
71
72static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
73static size_t line_width;	/* width of a line (two columns and divider) */
74static size_t width;		/* width of each column */
75static size_t file1ln, file2ln;	/* line number of file1 and file2 */
76static int Iflag = 0;	/* ignore sets matching regexp */
77static int	lflag;		/* print only left column for identical lines */
78static int	sflag;		/* skip identical lines */
79FILE *outfp;		/* file to save changes to */
80const char *tmpdir;	/* TMPDIR or /tmp */
81
82enum {
83	HELP_OPT = CHAR_MAX + 1,
84	NORMAL_OPT,
85	FCASE_SENSITIVE_OPT,
86	FCASE_IGNORE_OPT,
87	FROMFILE_OPT,
88	TOFILE_OPT,
89	UNIDIR_OPT,
90	STRIPCR_OPT,
91	HORIZ_OPT,
92	LEFTC_OPT,
93	SUPCL_OPT,
94	LF_OPT,
95	/* the following groupings must be in sequence */
96	OLDGF_OPT,
97	NEWGF_OPT,
98	UNCGF_OPT,
99	CHGF_OPT,
100	OLDLF_OPT,
101	NEWLF_OPT,
102	UNCLF_OPT,
103	/* end order-sensitive enums */
104	TSIZE_OPT,
105	HLINES_OPT,
106	LFILES_OPT,
107	DIFFPROG_OPT,
108	PIPE_FD,
109	/* pid from the diff parent (if applicable) */
110	DIFF_PID,
111
112	NOOP_OPT,
113};
114
115static struct option longopts[] = {
116	/* options only processed in sdiff */
117	{ "left-column",		no_argument,		NULL,	LEFTC_OPT },
118	{ "suppress-common-lines",	no_argument,		NULL,	's' },
119	{ "width",			required_argument,	NULL,	'w' },
120
121	{ "output",			required_argument,	NULL,	'o' },
122	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
123
124	{ "pipe-fd",			required_argument,	NULL,	PIPE_FD },
125	{ "diff-pid",			required_argument,	NULL,	DIFF_PID },
126	/* Options processed by diff. */
127	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
128	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
129	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
130	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
131	{ "help",			no_argument,		NULL,	HELP_OPT },
132	{ "text",			no_argument,		NULL,	'a' },
133	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
134	{ "ignore-space-change",	no_argument,		NULL,	'b' },
135	{ "minimal",			no_argument,		NULL,	'd' },
136	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
137	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
138	{ "ignore-case",		no_argument,		NULL,	'i' },
139	{ "expand-tabs",		no_argument,		NULL,	't' },
140	{ "speed-large-files",		no_argument,		NULL,	'H' },
141	{ "ignore-all-space",		no_argument,		NULL,	'W' },
142
143	{ NULL,				0,			NULL,	'\0'}
144};
145
146static const char *help_msg[] = {
147	"\nusage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
148	"\t-l, --left-column, Only print the left column for identical lines.",
149	"\t-o OUTFILE, --output=OUTFILE, nteractively merge file1 and file2 into outfile.",
150	"\t-s, --suppress-common-lines, Skip identical lines.",
151	"\t-w WIDTH, --width=WIDTH, Print a maximum of WIDTH characters on each line.",
152	"\tOptions passed to diff(1) are:",
153	"\t\t-a, --text, Treat file1 and file2 as text files.",
154	"\t\t-b, --ignore-trailing-cr, Ignore trailing blank spaces.",
155	"\t\t-d, --minimal, Minimize diff size.",
156	"\t\t-I RE, --ignore-matching-lines=RE, Ignore changes whose line matches RE.",
157	"\t\t-i, --ignore-case, Do a case-insensitive comparison.",
158	"\t\t-t, --expand-tabs Expand tabs to spaces.",
159	"\t\t-W, --ignore-all-spaces, Ignore all spaces.",
160	"\t\t--speed-large-files, Assume large file with scattered changes.",
161	"\t\t--strip-trailing-cr, Strip trailing carriage return.",
162	"\t\t--ignore-file-name-case, Ignore case of file names.",
163	"\t\t--no-ignore-file-name-case, Do not ignore file name case",
164	"\t\t--tabsize NUM, Change size of tabs (default 8.)",
165
166	NULL,
167};
168
169/*
170 * Create temporary file if source_file is not a regular file.
171 * Returns temporary file name if one was malloced, NULL if unnecessary.
172 */
173static char *
174mktmpcpy(const char *source_file)
175{
176	struct stat sb;
177	ssize_t rcount;
178	int ifd, ofd;
179	u_char buf[BUFSIZ];
180	char *target_file;
181
182	/* Open input and output. */
183	ifd = open(source_file, O_RDONLY, 0);
184	/* File was opened successfully. */
185	if (ifd != -1) {
186		if (fstat(ifd, &sb) == -1)
187			err(2, "error getting file status from %s", source_file);
188
189		/* Regular file. */
190		if (S_ISREG(sb.st_mode)) {
191			close(ifd);
192			return (NULL);
193		}
194	} else {
195		/* If ``-'' does not exist the user meant stdin. */
196		if (errno == ENOENT && strcmp(source_file, "-") == 0)
197			ifd = STDIN_FILENO;
198		else
199			err(2, "error opening %s", source_file);
200	}
201
202	/* Not a regular file, so copy input into temporary file. */
203	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
204		err(2, "asprintf");
205	if ((ofd = mkstemp(target_file)) == -1) {
206		warn("error opening %s", target_file);
207		goto FAIL;
208	}
209	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
210	    rcount != 0) {
211		ssize_t wcount;
212
213		wcount = write(ofd, buf, (size_t)rcount);
214		if (-1 == wcount || rcount != wcount) {
215			warn("error writing to %s", target_file);
216			goto FAIL;
217		}
218	}
219	if (rcount == -1) {
220		warn("error reading from %s", source_file);
221		goto FAIL;
222	}
223
224	close(ifd);
225	close(ofd);
226
227	return (target_file);
228
229FAIL:
230	unlink(target_file);
231	exit(2);
232}
233
234int
235main(int argc, char **argv)
236{
237	FILE *diffpipe=NULL, *file1, *file2;
238	size_t diffargc = 0, wflag = WIDTH;
239	int ch, fd[2] = {-1}, status;
240	pid_t pid=0; pid_t ppid =-1;
241	const char *outfile = NULL;
242	struct option *popt;
243	char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2,
244	     *tmp1, *tmp2, *s1, *s2;
245	int i;
246
247	/*
248	 * Process diff flags.
249	 */
250	/*
251	 * Allocate memory for diff arguments and NULL.
252	 * Each flag has at most one argument, so doubling argc gives an
253	 * upper limit of how many diff args can be passed.  argv[0],
254	 * file1, and file2 won't have arguments so doubling them will
255	 * waste some memory; however we need an extra space for the
256	 * NULL at the end, so it sort of works out.
257	 */
258	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
259		err(2, "main");
260
261	/* Add first argument, the program name. */
262	diffargv[diffargc++] = diffprog;
263
264	/* create a dynamic string for merging single-switch options */
265	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
266		err(2, "main");
267
268	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
269	    longopts, NULL)) != -1) {
270		const char *errstr;
271
272		switch (ch) {
273		/* only compatible --long-name-form with diff */
274		case FCASE_IGNORE_OPT:
275		case FCASE_SENSITIVE_OPT:
276		case STRIPCR_OPT:
277		case TSIZE_OPT:
278		case 'S':
279		break;
280		/* combine no-arg single switches */
281		case 'a':
282		case 'B':
283		case 'b':
284		case 'd':
285		case 'E':
286		case 'i':
287		case 't':
288		case 'H':
289		case 'W':
290			for(popt = longopts; ch != popt->val && popt->name != NULL; popt++);
291			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
292			/*
293			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
294			 */
295			if (ch == 'W')
296				sprintf(diffargv[1], "%sw", diffargv[1]);
297			else
298				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
299			break;
300		case DIFFPROG_OPT:
301			diffargv[0] = diffprog = optarg;
302			break;
303		case 'I':
304			Iflag = 1;
305			diffargv[diffargc++] = "-I";
306			diffargv[diffargc++] = optarg;
307			break;
308		case 'l':
309			lflag = 1;
310			break;
311		case 'o':
312			outfile = optarg;
313			break;
314		case 's':
315			sflag = 1;
316			break;
317		case 'w':
318			wflag = strtonum(optarg, WIDTH_MIN,
319			    INT_MAX, &errstr);
320			if (errstr)
321				errx(2, "width is %s: %s", errstr, optarg);
322			break;
323		case DIFF_PID:
324			ppid = strtonum(optarg, 0, INT_MAX, &errstr);
325			if (errstr)
326				errx(2, "diff pid value is %s: %s", errstr, optarg);
327			break;
328		case HELP_OPT:
329			for (i = 0; help_msg[i] != NULL; i++)
330				printf("%s\n", help_msg[i]);
331			exit(0);
332			break;
333		default:
334			usage();
335			break;
336		}
337	}
338
339	/* no single switches were used */
340	if (strcmp(diffargv[1], "-") == 0 ) {
341		for ( i = 1; i < argc-1; i++) {
342			diffargv[i] = diffargv[i+1];
343		}
344		diffargv[diffargc-1] = NULL;
345		diffargc--;
346	}
347
348	argc -= optind;
349	argv += optind;
350
351	if (argc != 2)
352		usage();
353
354	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
355		err(2, "could not open: %s", optarg);
356
357	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
358		tmpdir = _PATH_TMP;
359
360	filename1 = argv[0];
361	filename2 = argv[1];
362
363	/*
364	 * Create temporary files for diff and sdiff to share if file1
365	 * or file2 are not regular files.  This allows sdiff and diff
366	 * to read the same inputs if one or both inputs are stdin.
367	 *
368	 * If any temporary files were created, their names would be
369	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
370	 */
371	tmp1 = tmp2 = NULL;
372	/* file1 and file2 are the same, so copy to same temp file. */
373	if (strcmp(filename1, filename2) == 0) {
374		if ((tmp1 = mktmpcpy(filename1)))
375			filename1 = filename2 = tmp1;
376	/* Copy file1 and file2 into separate temp files. */
377	} else {
378		if ((tmp1 = mktmpcpy(filename1)))
379			filename1 = tmp1;
380		if ((tmp2 = mktmpcpy(filename2)))
381			filename2 = tmp2;
382	}
383
384	diffargv[diffargc++] = filename1;
385	diffargv[diffargc++] = filename2;
386	/* Add NULL to end of array to indicate end of array. */
387	diffargv[diffargc++] = NULL;
388
389	/* Subtract column divider and divide by two. */
390	width = (wflag - 3) / 2;
391	/* Make sure line_width can fit in size_t. */
392	if (width > (SIZE_MAX - 3) / 2)
393		errx(2, "width is too large: %zu", width);
394	line_width = width * 2 + 3;
395
396	if (ppid == -1 ) {
397		if (pipe(fd))
398			err(2, "pipe");
399
400		switch (pid = fork()) {
401		case 0:
402			/* child */
403			/* We don't read from the pipe. */
404			close(fd[0]);
405			if (dup2(fd[1], STDOUT_FILENO) == -1)
406				err(2, "child could not duplicate descriptor");
407			/* Free unused descriptor. */
408			close(fd[1]);
409			execvp(diffprog, diffargv);
410			err(2, "could not execute diff: %s", diffprog);
411			break;
412		case -1:
413			err(2, "could not fork");
414			break;
415		}
416
417		/* parent */
418		/* We don't write to the pipe. */
419		close(fd[1]);
420
421		/* Open pipe to diff command. */
422		if ((diffpipe = fdopen(fd[0], "r")) == NULL)
423			err(2, "could not open diff pipe");
424	}
425	if ((file1 = fopen(filename1, "r")) == NULL)
426		err(2, "could not open %s", filename1);
427	if ((file2 = fopen(filename2, "r")) == NULL)
428		err(2, "could not open %s", filename2);
429	if (!istextfile(file1) || !istextfile(file2)) {
430		/* Close open files and pipe, delete temps */
431		fclose(file1);
432		fclose(file2);
433		fclose(diffpipe);
434		if (tmp1)
435			if (unlink(tmp1))
436				warn("Error deleting %s.", tmp1);
437		if (tmp2)
438			if (unlink(tmp2))
439				warn("Error deleting %s.", tmp2);
440		free(tmp1);
441		free(tmp2);
442		binexec(diffprog, filename1, filename2);
443	}
444	/* Line numbers start at one. */
445	file1ln = file2ln = 1;
446
447	/* Read and parse diff output. */
448	while (parsecmd(diffpipe, file1, file2) != EOF)
449		;
450	fclose(diffpipe);
451
452	/* Wait for diff to exit. */
453	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
454	    WEXITSTATUS(status) >= 2)
455		err(2, "diff exited abnormally.");
456
457	/* Delete and free unneeded temporary files. */
458	if (tmp1)
459		if (unlink(tmp1))
460			warn("Error deleting %s.", tmp1);
461	if (tmp2)
462		if (unlink(tmp2))
463			warn("Error deleting %s.", tmp2);
464	free(tmp1);
465	free(tmp2);
466	filename1 = filename2 = tmp1 = tmp2 = NULL;
467
468	/* No more diffs, so print common lines. */
469	if (lflag)
470		while ((s1 = xfgets(file1)))
471			enqueue(s1, ' ', NULL);
472	else
473		for (;;) {
474			s1 = xfgets(file1);
475			s2 = xfgets(file2);
476			if (s1 || s2)
477				enqueue(s1, ' ', s2);
478			else
479				break;
480		}
481	fclose(file1);
482	fclose(file2);
483	/* Process unmodified lines. */
484	processq();
485
486	/* Return diff exit status. */
487	return (WEXITSTATUS(status));
488}
489
490/*
491 * When sdiff/zsdiff detects a binary file as input, executes them with
492 * diff/zdiff to maintain the same behavior as GNU sdiff with binary input.
493 */
494static void
495binexec(char *diffprog, char *f1, char *f2)
496{
497
498	char *args[] = {diffprog, f1, f2, (char *) 0};
499	execv(diffprog, args);
500
501	/* If execv() fails, sdiff's execution will continue below. */
502	errx(1, "Could not execute diff process.\n");
503}
504
505/*
506 * Checks whether a file appears to be a text file.
507 */
508static int
509istextfile(FILE *f)
510{
511	int	ch, i;
512
513	if (f == NULL)
514		return (1);
515	rewind(f);
516	for (i = 0; i <= MAX_CHECK; i++) {
517		ch = fgetc(f);
518		if (ch == '\0') {
519			rewind(f);
520			return (0);
521		}
522		if (ch == EOF)
523			break;
524	}
525	rewind(f);
526	return (1);
527}
528
529/*
530 * Prints an individual column (left or right), taking into account
531 * that tabs are variable-width.  Takes a string, the current column
532 * the cursor is on the screen, and the maximum value of the column.
533 * The column value is updated as we go along.
534 */
535static void
536printcol(const char *s, size_t *col, const size_t col_max)
537{
538
539	for (; *s && *col < col_max; ++s) {
540		size_t new_col;
541
542		switch (*s) {
543		case '\t':
544			/*
545			 * If rounding to next multiple of eight causes
546			 * an integer overflow, just return.
547			 */
548			if (*col > SIZE_MAX - 8)
549				return;
550
551			/* Round to next multiple of eight. */
552			new_col = (*col / 8 + 1) * 8;
553
554			/*
555			 * If printing the tab goes past the column
556			 * width, don't print it and just quit.
557			 */
558			if (new_col > col_max)
559				return;
560			*col = new_col;
561			break;
562		default:
563			++(*col);
564		}
565		putchar(*s);
566	}
567}
568
569/*
570 * Prompts user to either choose between two strings or edit one, both,
571 * or neither.
572 */
573static void
574prompt(const char *s1, const char *s2)
575{
576	char *cmd;
577
578	/* Print command prompt. */
579	putchar('%');
580
581	/* Get user input. */
582	for (; (cmd = xfgets(stdin)); free(cmd)) {
583		const char *p;
584
585		/* Skip leading whitespace. */
586		for (p = cmd; isspace(*p); ++p)
587			;
588		switch (*p) {
589		case 'e':
590			/* Skip `e'. */
591			++p;
592			if (eparse(p, s1, s2) == -1)
593				goto USAGE;
594			break;
595		case 'l':
596		case '1':
597			/* Choose left column as-is. */
598			if (s1 != NULL)
599				fprintf(outfp, "%s\n", s1);
600			/* End of command parsing. */
601			break;
602		case 'q':
603			goto QUIT;
604		case 'r':
605		case '2':
606			/* Choose right column as-is. */
607			if (s2 != NULL)
608				fprintf(outfp, "%s\n", s2);
609			/* End of command parsing. */
610			break;
611		case 's':
612			sflag = 1;
613			goto PROMPT;
614		case 'v':
615			sflag = 0;
616			/* FALLTHROUGH */
617		default:
618			/* Interactive usage help. */
619USAGE:
620			int_usage();
621PROMPT:
622			putchar('%');
623
624			/* Prompt user again. */
625			continue;
626		}
627		free(cmd);
628		return;
629	}
630
631	/*
632	 * If there was no error, we received an EOF from stdin, so we
633	 * should quit.
634	 */
635QUIT:
636	fclose(outfp);
637	exit(0);
638}
639
640/*
641 * Takes two strings, separated by a column divider.  NULL strings are
642 * treated as empty columns.  If the divider is the ` ' character, the
643 * second column is not printed (-l flag).  In this case, the second
644 * string must be NULL.  When the second column is NULL, the divider
645 * does not print the trailing space following the divider character.
646 *
647 * Takes into account that tabs can take multiple columns.
648 */
649static void
650println(const char *s1, const char div, const char *s2)
651{
652	size_t col;
653
654	/* Print first column.  Skips if s1 == NULL. */
655	col = 0;
656	if (s1) {
657		/* Skip angle bracket and space. */
658		printcol(s1, &col, width);
659
660	}
661
662	/* Otherwise, we pad this column up to width. */
663	for (; col < width; ++col)
664		putchar(' ');
665
666	/* Only print left column. */
667	if (div == ' ' && !s2) {
668		printf(" (\n");
669		return;
670	}
671
672	/*
673	 * Print column divider.  If there is no second column, we don't
674	 * need to add the space for padding.
675	 */
676	if (!s2) {
677		printf(" %c\n", div);
678		return;
679	}
680	printf(" %c ", div);
681	col += 3;
682
683	/* Skip angle bracket and space. */
684	printcol(s2, &col, line_width);
685
686	putchar('\n');
687}
688
689/*
690 * Reads a line from file and returns as a string.  If EOF is reached,
691 * NULL is returned.  The returned string must be freed afterwards.
692 */
693static char *
694xfgets(FILE *file)
695{
696	const char delim[3] = {'\0', '\0', '\0'};
697	char *s;
698
699	/* XXX - Is this necessary? */
700	clearerr(file);
701
702	if (!(s = fparseln(file, NULL, NULL, delim, 0)) &&
703	    ferror(file))
704		err(2, "error reading file");
705
706	if (!s) {
707		return (NULL);
708	}
709
710	return (s);
711}
712
713/*
714 * Parse ed commands from diffpipe and print lines from file1 (lines
715 * to change or delete) or file2 (lines to add or change).
716 * Returns EOF or 0.
717 */
718static int
719parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
720{
721	size_t file1start, file1end, file2start, file2end, n;
722	/* ed command line and pointer to characters in line */
723	char *line, *p, *q;
724	const char *errstr;
725	char c, cmd;
726
727	/* Read ed command. */
728	if (!(line = xfgets(diffpipe)))
729		return (EOF);
730
731	p = line;
732	/* Go to character after line number. */
733	while (isdigit(*p))
734		++p;
735	c = *p;
736	*p++ = 0;
737	file1start = strtonum(line, 0, INT_MAX, &errstr);
738	if (errstr)
739		errx(2, "file1 start is %s: %s", errstr, line);
740
741	/* A range is specified for file1. */
742	if (c == ',') {
743		q = p;
744		/* Go to character after file2end. */
745		while (isdigit(*p))
746			++p;
747		c = *p;
748		*p++ = 0;
749		file1end = strtonum(q, 0, INT_MAX, &errstr);
750		if (errstr)
751			errx(2, "file1 end is %s: %s", errstr, line);
752		if (file1start > file1end)
753			errx(2, "invalid line range in file1: %s", line);
754	} else
755		file1end = file1start;
756
757	cmd = c;
758	/* Check that cmd is valid. */
759	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
760		errx(2, "ed command not recognized: %c: %s", cmd, line);
761
762	q = p;
763	/* Go to character after line number. */
764	while (isdigit(*p))
765		++p;
766	c = *p;
767	*p++ = 0;
768	file2start = strtonum(q, 0, INT_MAX, &errstr);
769	if (errstr)
770		errx(2, "file2 start is %s: %s", errstr, line);
771
772	/*
773	 * There should either be a comma signifying a second line
774	 * number or the line should just end here.
775	 */
776	if (c != ',' && c != '\0')
777		errx(2, "invalid line range in file2: %c: %s", c, line);
778
779	if (c == ',') {
780
781		file2end = strtonum(p, 0, INT_MAX, &errstr);
782		if (errstr)
783			errx(2, "file2 end is %s: %s", errstr, line);
784		if (file2start >= file2end)
785			errx(2, "invalid line range in file2: %s", line);
786	} else
787		file2end = file2start;
788
789	/* Appends happen _after_ stated line. */
790	if (cmd == 'a') {
791		if (file1start != file1end)
792			errx(2, "append cannot have a file1 range: %s",
793			    line);
794		if (file1start == SIZE_MAX)
795			errx(2, "file1 line range too high: %s", line);
796		file1start = ++file1end;
797	}
798	/*
799	 * I'm not sure what the deal is with the line numbers for
800	 * deletes, though.
801	 */
802	else if (cmd == 'd') {
803		if (file2start != file2end)
804			errx(2, "delete cannot have a file2 range: %s",
805			    line);
806		if (file2start == SIZE_MAX)
807			errx(2, "file2 line range too high: %s", line);
808		file2start = ++file2end;
809	}
810
811	/*
812	 * Continue reading file1 and file2 until we reach line numbers
813	 * specified by diff.  Should only happen with -I flag.
814	 */
815	for (; file1ln < file1start && file2ln < file2start;
816	    ++file1ln, ++file2ln) {
817		char *s1, *s2;
818
819		if (!(s1 = xfgets(file1)))
820			errx(2, "file1 shorter than expected");
821		if (!(s2 = xfgets(file2)))
822			errx(2, "file2 shorter than expected");
823
824		/* If the -l flag was specified, print only left column. */
825		if (lflag) {
826			free(s2);
827			/*
828			 * XXX - If -l and -I are both specified, all
829			 * unchanged or ignored lines are shown with a
830			 * `(' divider.  This matches GNU sdiff, but I
831			 * believe it is a bug.  Just check out:
832			 * gsdiff -l -I '^$' samefile samefile.
833			 */
834			if (Iflag)
835				enqueue(s1, '(', NULL);
836			else
837				enqueue(s1, ' ', NULL);
838		} else
839			enqueue(s1, ' ', s2);
840	}
841	/* Ignore deleted lines. */
842	for (; file1ln < file1start; ++file1ln) {
843		char *s;
844
845		if (!(s = xfgets(file1)))
846			errx(2, "file1 shorter than expected");
847
848		enqueue(s, '(', NULL);
849	}
850	/* Ignore added lines. */
851	for (; file2ln < file2start; ++file2ln) {
852		char *s;
853
854		if (!(s = xfgets(file2)))
855			errx(2, "file2 shorter than expected");
856
857		/* If -l flag was given, don't print right column. */
858		if (lflag)
859			free(s);
860		else
861			enqueue(NULL, ')', s);
862	}
863
864	/* Process unmodified or skipped lines. */
865	processq();
866
867	switch (cmd) {
868	case 'a':
869		printa(file2, file2end);
870		n = file2end - file2start + 1;
871		break;
872	case 'c':
873		printc(file1, file1end, file2, file2end);
874		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
875		break;
876	case 'd':
877		printd(file1, file1end);
878		n = file1end - file1start + 1;
879		break;
880	default:
881		errx(2, "invalid diff command: %c: %s", cmd, line);
882	}
883	free(line);
884
885	/* Skip to next ed line. */
886	while (n--) {
887		if (!(line = xfgets(diffpipe)))
888			errx(2, "diff ended early");
889		free(line);
890	}
891
892	return (0);
893}
894
895/*
896 * Queues up a diff line.
897 */
898static void
899enqueue(char *left, char div, char *right)
900{
901	struct diffline *diffp;
902
903	if (!(diffp = malloc(sizeof(struct diffline))))
904		err(2, "enqueue");
905	diffp->left = left;
906	diffp->div = div;
907	diffp->right = right;
908	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
909}
910
911/*
912 * Free a diffline structure and its elements.
913 */
914static void
915freediff(struct diffline *diffp)
916{
917
918	free(diffp->left);
919	free(diffp->right);
920	free(diffp);
921}
922
923/*
924 * Append second string into first.  Repeated appends to the same string
925 * are cached, making this an O(n) function, where n = strlen(append).
926 */
927static void
928astrcat(char **s, const char *append)
929{
930	/* Length of string in previous run. */
931	static size_t offset = 0;
932	size_t newsiz;
933	/*
934	 * String from previous run.  Compared to *s to see if we are
935	 * dealing with the same string.  If so, we can use offset.
936	 */
937	static const char *oldstr = NULL;
938	char *newstr;
939
940	/*
941	 * First string is NULL, so just copy append.
942	 */
943	if (!*s) {
944		if (!(*s = strdup(append)))
945			err(2, "astrcat");
946
947		/* Keep track of string. */
948		offset = strlen(*s);
949		oldstr = *s;
950
951		return;
952	}
953
954	/*
955	 * *s is a string so concatenate.
956	 */
957
958	/* Did we process the same string in the last run? */
959	/*
960	 * If this is a different string from the one we just processed
961	 * cache new string.
962	 */
963	if (oldstr != *s) {
964		offset = strlen(*s);
965		oldstr = *s;
966	}
967
968	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
969	newsiz = offset + 1 + strlen(append) + 1;
970
971	/* Resize *s to fit new string. */
972	newstr = realloc(*s, newsiz);
973	if (newstr == NULL)
974		err(2, "astrcat");
975	*s = newstr;
976
977	/* *s + offset should be end of string. */
978	/* Concatenate. */
979	strlcpy(*s + offset, "\n", newsiz - offset);
980	strlcat(*s + offset, append, newsiz - offset);
981
982	/* New string length should be exactly newsiz - 1 characters. */
983	/* Store generated string's values. */
984	offset = newsiz - 1;
985	oldstr = *s;
986}
987
988/*
989 * Process diff set queue, printing, prompting, and saving each diff
990 * line stored in queue.
991 */
992static void
993processq(void)
994{
995	struct diffline *diffp;
996	char divc, *left, *right;
997
998	/* Don't process empty queue. */
999	if (STAILQ_EMPTY(&diffhead))
1000		return;
1001
1002	/* Remember the divider. */
1003	divc = STAILQ_FIRST(&diffhead)->div;
1004
1005	left = NULL;
1006	right = NULL;
1007	/*
1008	 * Go through set of diffs, concatenating each line in left or
1009	 * right column into two long strings, `left' and `right'.
1010	 */
1011	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1012		/*
1013		 * Print changed lines if -s was given,
1014		 * print all lines if -s was not given.
1015		 */
1016		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1017		    diffp->div == '>')
1018			println(diffp->left, diffp->div, diffp->right);
1019
1020		/* Append new lines to diff set. */
1021		if (diffp->left)
1022			astrcat(&left, diffp->left);
1023		if (diffp->right)
1024			astrcat(&right, diffp->right);
1025	}
1026
1027	/* Empty queue and free each diff line and its elements. */
1028	while (!STAILQ_EMPTY(&diffhead)) {
1029		diffp = STAILQ_FIRST(&diffhead);
1030		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1031		freediff(diffp);
1032	}
1033
1034	/* Write to outfp, prompting user if lines are different. */
1035	if (outfp)
1036		switch (divc) {
1037		case ' ': case '(': case ')':
1038			fprintf(outfp, "%s\n", left);
1039			break;
1040		case '|': case '<': case '>':
1041			prompt(left, right);
1042			break;
1043		default:
1044			errx(2, "invalid divider: %c", divc);
1045		}
1046
1047	/* Free left and right. */
1048	free(left);
1049	free(right);
1050}
1051
1052/*
1053 * Print lines following an (a)ppend command.
1054 */
1055static void
1056printa(FILE *file, size_t line2)
1057{
1058	char *line;
1059
1060	for (; file2ln <= line2; ++file2ln) {
1061		if (!(line = xfgets(file)))
1062			errx(2, "append ended early");
1063		enqueue(NULL, '>', line);
1064	}
1065	processq();
1066}
1067
1068/*
1069 * Print lines following a (c)hange command, from file1ln to file1end
1070 * and from file2ln to file2end.
1071 */
1072static void
1073printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1074{
1075	struct fileline {
1076		STAILQ_ENTRY(fileline)	 fileentries;
1077		char			*line;
1078	};
1079	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1080
1081	/* Read lines to be deleted. */
1082	for (; file1ln <= file1end; ++file1ln) {
1083		struct fileline *linep;
1084		char *line1;
1085
1086		/* Read lines from both. */
1087		if (!(line1 = xfgets(file1)))
1088			errx(2, "error reading file1 in delete in change");
1089
1090		/* Add to delete queue. */
1091		if (!(linep = malloc(sizeof(struct fileline))))
1092			err(2, "printc");
1093		linep->line = line1;
1094		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1095	}
1096
1097	/* Process changed lines.. */
1098	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1099	    ++file2ln) {
1100		struct fileline *del;
1101		char *add;
1102
1103		/* Get add line. */
1104		if (!(add = xfgets(file2)))
1105			errx(2, "error reading add in change");
1106
1107		del = STAILQ_FIRST(&delqhead);
1108		enqueue(del->line, '|', add);
1109		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1110		/*
1111		 * Free fileline structure but not its elements since
1112		 * they are queued up.
1113		 */
1114		free(del);
1115	}
1116	processq();
1117
1118	/* Process remaining lines to add. */
1119	for (; file2ln <= file2end; ++file2ln) {
1120		char *add;
1121
1122		/* Get add line. */
1123		if (!(add = xfgets(file2)))
1124			errx(2, "error reading add in change");
1125
1126		enqueue(NULL, '>', add);
1127	}
1128	processq();
1129
1130	/* Process remaining lines to delete. */
1131	while (!STAILQ_EMPTY(&delqhead)) {
1132		struct fileline *filep;
1133
1134		filep = STAILQ_FIRST(&delqhead);
1135		enqueue(filep->line, '<', NULL);
1136		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1137		free(filep);
1138	}
1139	processq();
1140}
1141
1142/*
1143 * Print deleted lines from file, from file1ln to file1end.
1144 */
1145static void
1146printd(FILE *file1, size_t file1end)
1147{
1148	char *line1;
1149
1150	/* Print out lines file1ln to line2. */
1151	for (; file1ln <= file1end; ++file1ln) {
1152		if (!(line1 = xfgets(file1)))
1153			errx(2, "file1 ended early in delete");
1154		enqueue(line1, '<', NULL);
1155	}
1156	processq();
1157}
1158
1159/*
1160 * Interactive mode usage.
1161 */
1162static void
1163int_usage(void)
1164{
1165
1166	puts("e:\tedit blank diff\n"
1167	    "eb:\tedit both diffs concatenated\n"
1168	    "el:\tedit left diff\n"
1169	    "er:\tedit right diff\n"
1170	    "l | 1:\tchoose left diff\n"
1171	    "r | 2:\tchoose right diff\n"
1172	    "s:\tsilent mode--don't print identical lines\n"
1173	    "v:\tverbose mode--print identical lines\n"
1174	    "q:\tquit");
1175}
1176
1177static void
1178usage(void)
1179{
1180
1181	fprintf(stderr,
1182	    "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1"
1183	    " file2\n");
1184	exit(2);
1185}
1186